/**************************************************************************
*                                                                         *
*  Author      : Dr. Thomas Brandes, GMD, SCAI.LAB                        *
*  Copyright   : GMD St. Augustin, Germany                                *
*  Date        : Oct 97                                                   *
*  Last Update : Jan 98                                                   *
*                                                                         *
*  This Module is part of the DALIB                                       *
*                                                                         *
*  Module      : context                                                  *
*                                                                         *
*  Function: Definition of active processor subsets and the correspon-    *
*            ding default processor topologies for them                   *
*            (needed for TASK PARALLELISM in data parallel language)      *
*                                                                         *
*  Import:                                                                *
*  =======                                                                *
*                                                                         *
*     USE topology                                                        *
*                                                                         *
**************************************************************************/

#include <stdio.h>
#include "dalib.h"

#undef DEBUG
#define CHECK

/**************************************************************************
*                                                                         *
*   Definition of the Data Structures for Context                         *
*                                                                         *
*    - a context (active processor subset) has a default                  *
*      topology for every rank                                            *
*                                                                         *
**************************************************************************/

     /**********************************************************
     *                                                         *
     *  Stack of topology context (active processors)          *
     *                                                         *
     *  - active topology in default_topologies[contex_rank-1] *
     *                                                         *
     **********************************************************/

typedef struct

  { int context_group;                   /* active processors subgroup      */
    int context_rank;                    /* rank of active processors shape */
    int default_topologies [MAX_RANK];   /* topologies of different ranks   */

  } topology_context;

#define MAX_CONTEXTS 30

static topology_context topology_stack [MAX_CONTEXTS];
static int              topology_stack_top = 0;      /* current stack top   */

static int              topology_stack_max = 0;      /* maximal stack top   */

static int              serial_context_set = 0;      /* not computed so far */
static topology_context serial_context;  /* special context if I am the
                                            only active processor        */

/**************************************************************************
*                                                                         *
*  void dalib_push_context (int rank, default_topologies[MAX_RANK])       *
*                                                                         *
*  - define a new context with already define default topologies          *
*                                                                         *
**************************************************************************/

static void dalib_push_context (context_rank, default_topologies)

int context_rank;
int default_topologies [];

{ topology_context *c;

  int rank;
  int topid;

  if (topology_stack_top >= MAX_CONTEXTS)

     { char msg[100];
       sprintf (msg, "%d: push_context: too many contexts (max = %d)",
                      pcb.i, MAX_CONTEXTS);
       dalib_internal_error (msg);
       dalib_stop ();
     }

  c = topology_stack + topology_stack_top;

  c->context_rank  = context_rank;
  c->context_group = dalib_top_group (default_topologies[0]);

  /* set the default topologies */

  for (rank=1; rank<=MAX_RANK; rank++)

    c->default_topologies[rank-1] = default_topologies[rank-1];

  /* push new entry on the stack */

  topology_stack_top++;

  /* set stack max if top has achieved never maximal value */

  if (topology_stack_top > topology_stack_max) 
      topology_stack_max = topology_stack_top;

#ifdef DEBUG
  printf ("%d: new context %d (group=%d) defined with ", 
          pcb.i, topology_stack_top, c->context_group);

  for (rank=1; rank<=MAX_RANK; rank++)
    { topid = default_topologies [rank-1]; 
      printf ("%d (rank=%d,size=%d) ", 
               topid, dalib_top_rank(topid), dalib_top_size(topid));
    }
  printf ("\n");
#endif

} /* dalib_push_context */

/**************************************************************************
*                                                                         *
*  void dalib_pop_context ()                                              *
*                                                                         *
**************************************************************************/

void dalib_pop_context ()

{ if (topology_stack_top <= 0) 

      { dalib_internal_error ("dalib_pop_context: no more context on stack");
        dalib_stop ();
      }

  topology_stack_top--;

} /* dalib_pop_context */

/**************************************************************************
*                                                                         *
*  topology_context *dalib_top_context ()                                 *
*                                                                         *
**************************************************************************/

static topology_context *dalib_top_context ()

{ if (topology_stack_top <= 0) 

      { dalib_internal_error ("dalib_top_context: no context on stack");
        dalib_stop ();
      }

  return topology_stack + topology_stack_top - 1;

} /* dalib_top_context */

static topology_context *dalib_main_context ()

{ if (topology_stack_top <= 0) 

      { dalib_internal_error ("dalib_main_context: no context defined");
        dalib_stop ();
      }

  return topology_stack;

} /* dalib_main_context */

/**************************************************************************
*                                                                         *
*  void dalib_factorize (int N, int k, int vals[])                        *
*                                                                         *
*   - N = vals[0] * vals[1] * ... * vals[k-1]                             *
*                                                                         *
*   - vals[0] <= vals[1] <= vals[2] <= ... <= vals[k-1]                   *
*                                                                         *
*   - 1 * 1 * ... N is always a solution but obviously not the best one   *
*                                                                         *
*   - needed to make default topologies                                   *
*                                                                         *
**************************************************************************/

#undef DEBUG

void dalib_factorize (N, k, vals)

int N, k, vals[];

{ int n1, n2;
  int hv [MAX_RANK];
  int i;

  if (k < 1)

    { dalib_internal_error ("factorize");
      dalib_stop ();
    }

  if (k == 1)

    { vals[0] = N;
      return;
    }


  n1 = 1;
  n2 = N;

  while (n1 <= n2)

     { if (n1 * n2 == N)

          { dalib_factorize (n2, k-1, hv);

            if (n1 <= hv[0])   /* this is a correct solution */

               { vals[0] = n1;
                 for (i=0; i<k-1; i++)
                    vals[i+1] = hv[i];
               }

          }
       n1 ++;
       n2 = N / n1;
     }

  /* note : at least one solution exists : 1 * 1 * .. * N */

#ifdef DEBUG
  printf ("dalib_factorize : N = %d, rank = %d, factors : %d %d %d\n",
           N, k, vals[0], vals[1], vals[2]);
#endif

} /* dalib_factorize */

/**************************************************************************
*                                                                         *
*  void dalib_make_torus (int N, int k, int vals[])                        *
*                                                                         *
*   - vals[0] x vals[1] x ... x vals[k-1] is a torus specification        *
*                                                                         *
*   - some values that are zero will be filled up with good values        *
*                                                                         *
**************************************************************************/

void dalib_make_torus (N, k, vals)

int N, k, vals[];

{ int hv [MAX_RANK];

  int pos, found;
  int i, n1;

#ifdef DEBUG
  printf ("dalib_make_torus : N = %d, rank = %d, shape = %d %d %d\n",
           N, k, vals[0], vals[1], vals[2]);
#endif

  if (k < 1)
    { dalib_internal_error ("make_torus");
      dalib_stop ();
    }

  if (k == 1)

    { if (vals[0] <= 0) vals[0] = N;
      return;
    }


  /* find a non-zero factor */

  pos   = 0;
  found = 0;

  while (!found && (pos < k))

    { found = vals[pos] > 0;
      if (!found) pos++;
    }

  if (!found)

     { /* specification is 0 x 0 x ... x 0, we factorize */

       dalib_factorize (N, k, vals);
       return;
     }

  /* remove non-zero factor and build hv */

  n1 = vals[pos];

  for (i=0; i<k; i++)

    { if (i < pos) hv[i]   = vals[i];
      if (i > pos) hv[i-1] = vals[i];
    }

  /* recursive call */

  dalib_make_torus (N/n1, k-1, hv);

  /* rebuild v, note that vals[pos] == n1 */

  for (i=0; i<k; i++)

    { if (i < pos)  vals[i] = hv[i];
      if (i > pos)  vals[i] = hv[i-1];
    }

#ifdef DEBUG
  printf ("dalib_make_torus : N = %d, rank = %d, shape = %d %d %d\n",
           N, k, vals[0], vals[1], vals[2]);
#endif

} /* dalib_make_torus */

/**************************************************************************
*                                                                         *
*  void dalib_push_main_context ()                                        *
*                                                                         *
*  - defines a new context for the main program                           *
*                                                                         *
**************************************************************************/

void dalib_push_main_context ()

{ int default_topologies [MAX_RANK];

  int main_group;
  int main_rank;                     /* rank of main topology */
  int N;

  int shape[MAX_RANK];
  int topology;

  int i, rank, rank1;

  main_group = dalib_group_all ();   /* group of all processores */
  N          = pcb.p;

  /* try to get default topologies from the process control block */

  shape[0] = pcb.p;
  default_topologies[0] = dalib_top_make (main_group, 1, shape);

  shape[0] = pcb.p2_1;
  shape[1] = pcb.p2_2;
  dalib_make_torus (N, 2, shape);
  default_topologies[1] = dalib_top_make (main_group, 2, shape);

  shape[0] = pcb.p3_1;
  shape[1] = pcb.p3_2;
  shape[2] = pcb.p3_3;
  dalib_make_torus (N, 3, shape);
  default_topologies[2] = dalib_top_make (main_group, 3, shape);

  main_rank = pcb.p_rank;
  rank1 = 4;

  /* define topologies for rank = rank1, ... , MAX_RANK */

  for (rank = rank1; rank <= MAX_RANK; rank++)

    { /* define a new topology for rank */

      for (i=0; i<rank; i++) shape[i] = 0;

      dalib_make_torus (N, rank, shape);

      topology = dalib_top_make (main_group, rank, shape);

      default_topologies[rank-1] = topology;

    }

  dalib_push_context (main_rank, default_topologies);

} /* dalib_push_main_context */

/**************************************************************************
*                                                                         *
*  void dalib_push_serial_context ()                                      *
*                                                                         *
*  - push context for a single processor                                  *
*                                                                         *
**************************************************************************/

void dalib_push_serial_context ()

{ /* check that the serial context has already been defined */

  if (serial_context_set == 0)

     { int i, rank;
       int shape[MAX_RANK];
       int serial_group;
       int topology;

       /* create a processor group that contains only me */

       serial_group = dalib_group1_create (1, pcb.i, 1);

       for (i=0; i<MAX_RANK; i++) shape[i] = 1;

       for (rank=1; rank <= MAX_RANK; rank++)

          { topology = dalib_top_make (serial_group, rank, shape);
            serial_context.default_topologies[rank-1] = topology;
          }
 
       serial_context.context_rank = 1;
       serial_context_set = 1;
     }

   dalib_push_context (serial_context.context_rank, 
                       serial_context.default_topologies);

} /* dalib_push_serial_context */

/**************************************************************************
*                                                                         *
*  void dalib_push_top_context (int topology)                             *
*                                                                         *
*  - topology becomes new context                                         *
*  - make new default topologies with this                                *
*                                                                         *
**************************************************************************/

void dalib_push_top_context (topology)

int topology;

{ int default_topologies [MAX_RANK];

  int top_rank;
  int top_size;
  int top_group;

  int i, rank;

  int shape [MAX_RANK];
  int new_topology;

  top_rank  = dalib_top_rank (topology);
  top_size  = dalib_top_size (topology);
  top_group = dalib_top_group (topology);

  /* get a default topology for every rank */

  for (rank=1; rank<=MAX_RANK; rank++)

    { if (rank == top_rank)

         default_topologies[rank-1] = topology;

       else

         { /* define a new topology for rank */

           for (i=0; i<rank; i++) shape[i] = 0;

           dalib_make_torus (top_size, rank, shape);

           new_topology = dalib_top_make (top_group, rank, shape);

           default_topologies[rank-1] = new_topology;

         }

    } /* for all ranks */

  /*  define the new context with the default topologies  */

  dalib_push_context (top_rank, default_topologies);

}  /* dalib_push_top_context */

/**************************************************************************
*                                                                         *
*  DALIB query routines:                                                  *
*                                                                         *
*  int dalib_context_group ()                                             *
*                                                                         *
*    - returns group identifier of all active processors                  *
*                                                                         *
*  int dalib_context_nproc ()                                             *
*                                                                         *
*    - returns number of processors in the active processor subset        *
*                                                                         *
*  int dalib_context_pid ()                                               *
*                                                                         *
*    - returns current processor id in active topology (1 <= pid <= N)    *
*                                                                         *
**************************************************************************/

int dalib_context_group ()

{ topology_context *current_context;

  current_context = dalib_top_context ();

  return current_context->context_group;

} /* dalib_context_group */

int dalib_context_nprocs ()

{ topology_context *current_context;

  current_context = dalib_top_context ();

  return dalib_group_size (current_context->context_group);

} /* dalib_context_nprocs */

int dalib_context_pid ()

{ topology_context *current_context;

  current_context = dalib_top_context ();

  return dalib_group_position (current_context->context_group, pcb.i);

} /* dalib_context_pid */

/**************************************************************************
*                                                                         *
*  DALIB query routines:                                                  *
*                                                                         *
*  int dalib_context_default_top (rank)                                   *
*                                                                         *
*    - returns default topology for a certain rank in the current context *
*                                                                         *
**************************************************************************/

int dalib_context_default_top (rank) 

int rank;

{ topology_context *current_context;

  int topology;

  current_context = dalib_top_context ();

  if (rank >= 1)
    topology = current_context->default_topologies[rank-1];
   else if (rank == 0)
    topology = current_context->default_topologies[0];
   else if (rank == -1)
    topology = rank;
   else if (rank == -2)
    topology = rank;  /* underspecified topology */
   else
    { dalib_internal_error ("default topology ???");
      dalib_stop ();
    }

#ifdef DEBUG
  printf ("%d: default topology in context is top = %d (rank=%d,size=%d)\n",
          pcb.i, topology, dalib_top_rank(topology), dalib_top_size(topology));
#endif

  return (topology);

} /* dalib_context_default_top */

/**************************************************************************
*                                                                         *
*  FORTRAN EXPORT                                                         *
*                                                                         *
*  INTEGER NUMBER_OF_PROCESSORS ([DIM])     ! HPF Intrinsic               *
*  INTEGER ACTIVE_NUM_PROCS     ([DIM])     ! HPF Extended Intrinsic      *
*                                                                         *
**************************************************************************/

          /**************************************************
          *   NUMBER_OF_PROCESSORS                          *
          **************************************************/

int FUNCTION(dalib_all_procs) (idim)
int *idim;

{ topology_context *main_context;
  int              main_topology;
  int              main_rank;

  main_context  = dalib_main_context ();
  main_rank     = main_context->context_rank;
  main_topology = main_context->default_topologies[main_rank-1];


  if (FUNCTION(dalib_present)(idim))

     { int dim, NP, NId;

       dim = *idim;

       if (dim < 1) return (0);
       if (dim > main_rank) return (0);

       dalib_top_info (main_topology, *idim, &NP, &NId);

       return (NP);
     }

    else /* return number of all processors */

     return (dalib_top_size (main_topology));

} /* FUNCTION(dalib_all_procs) */

          /**************************************************
          *   ACTIVE_NUM_PROCS ([IDIM])                     *
          **************************************************/

void FUNCTION(dalib_active_num_procs) (active, idim)

int *active;
int *idim;

{ topology_context *current_context;
  int              current_topology;
  int              current_rank;

  current_context  = dalib_top_context ();
  current_rank     = current_context->context_rank;
  current_topology = current_context->default_topologies[current_rank-1];


  if (FUNCTION(dalib_present)(idim))

     { int dim, NP, NId;

       dim = *idim;

       if (dim < 1)
          *active = 0;
       else if (dim > current_rank)
          *active = 0;
       else
        { dalib_top_info (current_topology, dim, &NP, &NId);
          *active = NP;
        }
     }

    else /* return number of active processors */

       *active = dalib_top_size (current_topology);

} /* FUNCTION(dalib_active_num_procs) */

          /**************************************************
          *   PROCESSORS_SHAPE ()                           *
          **************************************************/

void FUNCTION(dalib_processors_shape) (shape_data, shape_dsp)

int shape_data[];
section_info shape_dsp;

{ topology_context *main_context;
  int              main_topology;
  int              main_rank;
  int shape[MAX_RANK];

  main_context  = dalib_main_context ();
  main_rank     = main_context->context_rank;
  main_topology = main_context->default_topologies[main_rank-1];

  dalib_top_shape (main_topology, shape);

  dalib_replicate_out (shape, sizeof(int)*main_rank, 
                       shape_data, shape_dsp);

} /* dalib_processors_shape */

          /**************************************************
          *   ACTIVE_PROCS_SHAPE ()                         *
          **************************************************/

void FUNCTION(dalib_active_procs_shape) (shape_data, shape_dsp)

int shape_data[];
section_info shape_dsp;

{ topology_context *current_context;
  int              current_topology;
  int              current_rank;
  int              shape[MAX_RANK];

  current_context  = dalib_top_context ();
  current_rank     = current_context->context_rank;
  current_topology = current_context->default_topologies[current_rank-1];

  dalib_top_shape (current_topology, shape);

  dalib_replicate_out (shape, sizeof(int)*current_rank, 
                       shape_data, shape_dsp);

} /* dalib_active_procs_shape */

          /***********************************************
          *  PROCESSORS_RANK ()                          *
          ***********************************************/

int FUNCTION(dalib_processors_rank) ()

{ topology_context *main_context;
  int              main_rank;

  main_context  = dalib_main_context ();
  main_rank     = main_context->context_rank;

  return (main_rank);

} /* FUNCTION(dalib_processors_rank) */

          /***********************************************
          *  ACTIVE_PROCS_RANK ()                        *
          ***********************************************/

int FUNCTION(dalib_active_procs_rank) ()

{ topology_context *current_context;
  int              current_rank;

  current_context  = dalib_top_context ();
  current_rank     = current_context->context_rank;

  return (current_rank);

} /* FUNCTION(dalib_active_procs_rank) */

          /**************************************************
          *   DALIB_PID ()   - ADAPTOR use                  *
          **************************************************/

int FUNCTION(dalib_pid) ()

{
   return (dalib_context_pid());

} /* FUNCTION(dalib_pid) */


          /***********************************************
          *                                              *
          *  INTEGER MY_PROCESSOR   ! HPF Local Library  *
          *                                              *
          ***********************************************/

int FUNCTION(dalib_my_processor) ()

{
   return (pcb.i-1);

} /* FUNCTION(dalib_my_processor) */

/**************************************************************************
*                                                                         *
*  Defining NEW CONTEXT                                                   *
*                                                                         *
**************************************************************************/

void FUNCTION(dalib_push_procs_context) (topology)

int *topology;

{ 
  dalib_push_top_context (*topology);

} /* dalib_push_procs_context */

void FUNCTION(dalib_pop_procs_context) ()

{
  dalib_pop_context ();
}

void FUNCTION(dalib_push_host_context) ()

{ 
   dalib_push_serial_context ();

} /* dalib_push_host_context */

void FUNCTION(dalib_pop_host_context) ()

{
   dalib_pop_context ();
}

/**************************************************************************
*                                                                         *
*  void dalib_push_local_context ()                                       *
*  void dalib_pop_local_context ()                                        *
*                                                                         *
*   - switching from the global context to the local context and          *
*     vice versa                                                          *
*                                                                         *
**************************************************************************/

void FUNCTION(dalib_push_local_context) ()

{  /* enable tasking between processors of current context */

   dalib_start_local_tasking (dalib_context_group ());

   dalib_push_serial_context ();

} /* dalib_push_local_context */

void FUNCTION(dalib_pop_local_context) ()

{  dalib_pop_context ();

   /* disable tasking between processors of current context */

   dalib_stop_local_tasking ();

} /* dalib_pop_local_context */
