/**************************************************************************
*                                                                         *
*  Author      : Dr. Thomas Brandes, GMD, SCAI.LAB                        *
*  Copyright   : GMD St. Augustin, Germany                                *
*  Date        : Jul 95                                                   *
*  Last Update : Sep 95                                                   *
*                                                                         *
*  This Module is part of the extended DALIB                              *
*                                                                         *
*  Module      : scalapack.c                                              *
*                                                                         *
*  Function    : Interface to ScaLAPACK                                   *
*                                                                         *
*  Export : FORTRAN Interface                                             *
*                                                                         *
*  a) PBLCAS interface                                                    *
*                                                                         *
*  void hpf_dgemm_ (char *transa, char *transb, double *alpha,            *
*                   section_id *a_id, section_id *b_id, double *beta,     *
*                   section_id *c_id)                                     *
*                                                                         *
*  b) ScaLAPACK interface                                                 *
*                                                                         *
*  until now the following routines are available:                        *
*                                                                         *
*  void hpf_dgetrf_ (section_id *a_id, double *ipiv, int *info)           *
*  void hpf_dgetri_ (section_id *a_id, double *ipiv, int *info)           *
*  void hpf_dgetrs_ (char *trans, section_id *a_id, double *ipiv,         *
*                    section_id *b_id, int *info)                         *
*                                                                         *
*  void hpf_dpotrf_ (char *uplo, section_id *a_id, int *info)             *
*                                                                         *
*  void hpf_dgeqrf_ (section_id *a_id, double *tau, int *info)            *
*                                                                         *
**************************************************************************/

#define DEBUG

#include "dalib.h"
#include <stdio.h>

/*******************************************************************
*                                                                  *
*  MACRO CONSTANTS  for the DESCRIPTORS                            *
*                                                                  *
*******************************************************************/

#define    M_           0                      /* Global Number of Rows */
#define    N_           1                   /* Global Number of Columns */
#define    MB_          2                          /* Row Blocking Size */
#define    NB_          3                       /* Column Blocking Size */
#define    RSRC_        4                     /* Starting Processor Row */
#define    CSRC_        5                  /* Starting Processor Column */
#define    CTXT_        6                              /* BLACS context */
#define    LLD_         7                    /* Local Leading Dimension */

/*******************************************************************
*                                                                  *
*  INTERFACE BLACS / DALIB - Topologies                            *
*                                                                  *
*    -> realize the mapping between processor arrays in            *
*       the DALIB and the ConTxt in BLACS                          *
*                                                                  *
*******************************************************************/

# define MAX_TOPS   20               /* maximal number of topologies in DALIB */
# define NO_CONTEXT -2

static int ContextArray [MAX_TOPS];  /* integer handles for BLACS context */
static int ContextCounter = -1;

/*******************************************************************
*                                                                  *
*  int dalib_define_grid (nprow, npcol)                            *
*                                                                  *
*  - define a new BLACS context for grid nprow x npcol             *
*  - column-major natural ordering                                 *
*  - returns integer handle to the created BLACS context           *
*                                                                  *
*******************************************************************/

static int dalib_define_grid (nprow, npcol)
int nprow, npcol;

{ int  what;         /* integer to define what to ask for     */
  char order[2];     /* used for gridinit                     */
  int  ConTxt;       /* integer handle returned from BLACS    */

  order[0] = 'c';  /* column-oriented */

  /* context has not been set until now */

#ifdef DEBUG
  printf ("create grid %d x %d\n", nprow, npcol);
#endif

  what = 0;   /* handle indicating default system context */
  blacs_get_ (&what, &what, &ConTxt);  /* usually ConTxt will be -1 */
  blacs_gridinit_ (&ConTxt, order, &nprow, &npcol);

  return (ConTxt);

} /* dalib_define_grid */

/*******************************************************************
*                                                                  *
*  void dalib_set_topology_context (top_id)                        *
*                                                                  *
*  - defines for a DALIB topology the corresponding BLACS context  *
*  - integer handle is accessible via array ContextArray           *
*                                                                  *
*******************************************************************/

static void dalib_set_topology_context (top_id)
int top_id;

{ int rank;
  int nprow, npcol;
  int dummy_pos;

  rank = dalib_top_rank (top_id);

  /* currently we can define grids only for two-dimensional topologies */
  if (rank != 2) return;

  dalib_top_info (top_id, 1, &nprow, &dummy_pos);
  dalib_top_info (top_id, 2, &npcol, &dummy_pos);

  ContextArray [top_id] = dalib_define_grid (nprow, npcol);

} /* dalib_set_topology_context */

/*******************************************************************
*                                                                  *
*  int dalib_get_topololgy_context (top_id)                        *
*                                                                  *
*   - returns BLACS context for topology identification            *
*                                                                  *
*******************************************************************/

static int dalib_get_topology_context (top_id)
int top_id;

{ int i;

  /* fill up positions in the ContextArray */

  if (ContextCounter == -1)

    { for (i=0; i<MAX_TOPS; i++)
          ContextArray [top_id] = NO_CONTEXT;
      ContextCounter = MAX_TOPS;
    }

  if (ContextArray[top_id] == NO_CONTEXT)
     dalib_set_topology_context (top_id);

  /* could not define a topology */

  if (ContextArray[top_id] == NO_CONTEXT)

     { dalib_internal_error ("could not define BLACS context for topology");
       dalib_stop ();
     }

  return (ContextArray [top_id]);

} /* dalib_get_topology_context */

/*******************************************************************
*                                                                  *
*  void dalib_get_global_array_size2 (array_id, m, n, ia, ja)      *
*                                                                  *
*  - make sure that section_id stands for two-dimensional section  *
*  - section_id corresponds to A(ia:ia+m-1,ja:ja+n-1)              *
*                                                                  *
*******************************************************************/

static void dalib_get_global_array_size2 (array_id, m, n, ia, ja)
array_info array_id;
int *m, *n, *ia, *ja;

{ DimInfo    *dims;
  int        rank;

  /* array_id is a full array */

  rank = array_id->rank;

  if (rank != 2) 
    { dalib_internal_error ("not a 2-dimensional array");
      dalib_stop ();
    }

  dims = array_id->dimensions;
  *m = dims->global_size[1] - dims->global_size[0] + 1;
  dims++;
  *n = dims->global_size[1] - dims->global_size[0] + 1;

  *ia = 1;
  *ja = 1;

#ifdef DEBUG
  printf ("get array size 2, m = %d, n = %d, ia = %d, ja = %d\n",
           *m, *n, *ia, *ja);
#endif

} /* dalib_get_global_array_size2 */

/*******************************************************************
*                                                                  *
*  void dalib_get_global_section_size2 (section_id, m, n, ia, ja)  *
*                                                                  *
*******************************************************************/

static void dalib_get_global_section_size2 (section_id, m, n, ia, ja)

section_info section_id;
int          *m, *n;
int          *ia, *ja;

{ array_info array_id;
  SecDimInfo *sdims;
  int        rank;
  int        error;

  array_id = section_id->array_id;
  rank = array_id->rank;

  if (rank != 2) 
    { dalib_internal_error ("not a 2-dimensional array");
      dalib_stop ();
    }

  sdims = section_id->dimensions;

  error = 0;
  if (!sdims->is_range) error = 1;
  if (sdims->global_range[2] != 1) error = 2;
  *m = sdims->global_range[1] - sdims->global_range[0] + 1;
  *ia = sdims->global_range[0];
  sdims++;
  if (!sdims->is_range) error = 1;
  if (sdims->global_range[2] != 1) error = 2;
  *n = sdims->global_range[1] - sdims->global_range[0] + 1;
  *ja = sdims->global_range[0];

  if (error == 1)
    { dalib_internal_error ("not a real section");
      dalib_stop ();
    }

  if (error == 2)
    { dalib_internal_error ("section has stride != 1");
      dalib_stop ();
    }

} /* get_global_section_size2 */

/*******************************************************************
*                                                                  *
*  void dalib_scala_dim_info (array_id, dim, n, nb, topid)         *
*                                                                  *
*  - input: array_id for distributed array, dim for dimension      *
*  - output:  n  is number of elements in the distributed dim      *
*             nb is blocking size                                  *
*             topid is topology to which array is mapped           *
*                                                                  *
*******************************************************************/

void dalib_scala_dim_info (array_id, dim, n, nb, top_id)

array_info array_id;
int        dim;       /* dim = 1 : rows, dim = 2 : columns */

int        *n;        /* number of elements in the dimension */
int        *nb;       /* blocking factor                     */
int        *top_id;

{ int base, str, lb, ub, kind;
  int mypos;
  int nproc;
  int top_dim;
 
  dalib_array_dim_info (array_id, dim, top_id, &top_dim,
                        &base, &str, &lb, &ub, &kind);

  *n = (ub - lb + 1);
 
  if (kind == kSERIAL_DIM)
     { nproc = 1; *nb = *n; }
   else
     { dalib_top_info (*top_id, top_dim, &nproc, &mypos);
       *nb = (*n + nproc - 1) / nproc;
     }

} /* dalib_scala_dim_info */

/*******************************************************************
*                                                                  *
*  dalib_get_array_info (array_id, A, desc_A)                      *
*                                                                  *
*  DALIB descriptor -> BLACS descriptor translation                *
*                                                                  *
*  -> desc_A is description vector associated with 2D array a_id   *
*  -> A itself will be pointer to local data                       *
*                                                                  *
*******************************************************************/

void dalib_get_array_info (a_id, A, desc_A)

array_info a_id;
char       **A;
int        desc_A[];

{ int context;

  int first, total[MAX_DIMENSIONS+1];
  int m, n;
  int mb, nb;
  int top_id;

  dalib_array_addressing (a_id, pcb.i, A, &first, total);

  dalib_scala_dim_info (a_id, 1, &m, &mb, &top_id);
  dalib_scala_dim_info (a_id, 2, &n, &nb, &top_id);

  context = dalib_get_topology_context (top_id);

  desc_A[M_]    = m;  /* number of rows in the distributed matrix */
  desc_A[N_]    = n;  /* number of columns in the distributed matrix */

  desc_A[MB_]   = mb;  /* blocking factor for the rows */
  desc_A[NB_]   = nb;  /* blocking factor for the columns */

  /* in HPF we start always with the first processor      */

  desc_A[RSRC_] = 0;  /* first row of processor array    */
  desc_A[CSRC_] = 0;  /* first column of processor array    */

  desc_A[CTXT_] = context;   /* BLACS context handle                       */
  desc_A[LLD_]  = total[1];  /* LLD : leading dimension of the local array */

#ifdef DEBUG
  printf ("dalib_get_array_info, M = %d, N = %d, MB = %d, NB = %d, LLD = %d, context = %d\n",
           m, n, mb, nb, total[1], context);
#endif

} /* dalib_get_array_info */

/*******************************************************************
*                                                                  *
*  dalib_get_section_info (section_id, m, n, A, ia, ja, desc_A)    *
*                                                                  *
*  DALIB descriptor -> BLACS descriptor translation                *
*                                                                  *
*  -> section_id corresponds to A(ia:ia+m-1,ja:ja+n-1)             *
*  -> desc_A is description vector associated with 2D array A      *
*  -> A itself will be pointer to local data                       *
*                                                                  *
*******************************************************************/

void dalib_get_section_info (section_id, m, n, A, ia, ja, desc_A)
section_info section_id;
int *m, *n, *ia, *ja;
char *A;
int desc_A[];

{ array_info array_id;
  DimInfo    *dims;
  int        rank;

  /* determine the global size m x n of the section and offsets ia, ja */

#ifdef DEBUG
  printf ("get section info for array/section id %d\n", section_id);
#endif

  if (dalib_is_array_info (section_id))
    { array_id = (array_info) section_id;
      dalib_get_global_array_size2 (array_id, m, n, ia, ja);
    }
   else if (dalib_is_section_info (section_id))
    { array_id = section_id->array_id;
      dalib_get_global_section_size2 (section_id, m, n, ia, ja);
    }
   else
    { dalib_internal_error ("get section info : neither array nor section");
      dalib_stop ();
    }

  dalib_get_array_info (array_id, A, desc_A);

} /* dalib_get_section_info */

static void dalib_print_descriptor (name, desc_A)
char name [];
int  desc_A [];

{ printf ("%d: descriptor of %s : M = %d, N = %d, MB = %d, NB = %d, LDD = %d\n",
   pcb.i, name, desc_A[M_], desc_A[N_], desc_A[MB_], desc_A[NB_], desc_A[LLD_]);

} /* dalib_print_descriptor */

/*******************************************************************
*                                                                  *
*  HPF BLACS interface  (EXPORTED routines for FORTRAN)            *
*                                                                  *
*  Hint: there are still some problems to solve here               *
*                                                                  *
*******************************************************************/

void hpf_dgemm_ (transa, transb, alpha, a_id, b_id, beta, c_id)

section_info *a_id, *b_id, *c_id;
float        *alpha, *beta;
char         *transa, *transb;

{ int desc_A [8];
  int desc_B [8];
  int desc_C [8];

  int m, n, k, m1, n1, k1;

  int ia, ib, ic, ja, jb, jc;

  char *A, *B, *C;             /* pointers to array data */

  int iam, nprocs;

#ifdef DEBUG
  blacs_pinfo_ (&iam, &nprocs);
  printf ("calling dgemm on %d of %d\n", iam, nprocs);
#endif

  /* prepare the call to the ScaLAPACK routine */

  dalib_get_section_info (*a_id, &m,  &k,  &A, &ia, &ja, desc_A);
  dalib_get_section_info (*b_id, &k1, &n,  &B, &ib, &jb, desc_B);
  dalib_get_section_info (*c_id, &m1, &n1, &C, &ic, &jc, desc_C);

#ifdef DEBUG
  printf ("A is %d x %d, B is %d x %d, C is %d x %d\n", m, k, k1, n, m1, n1);
#endif

  if ((m != m1) || (n != n1) || (k != k1))

    { dalib_internal_error ("pdgemm: size mismatch");
      printf ("A is %d x %d, B is %d x %d, C is %d x %d\n",
               m, k, k1, n, m1, n1);
      dalib_stop();
    }

  /* other tests might be useful, e.g. desc_A [NB_] == ...
     this will be tested in the call, but error messages are not
     very useful for the user, and in fact there are a lot of restrictions */

  /* call of the ScaLAPACK routine */

#ifdef DEBUG
  printf ("%d: pdgemm (_, _, m=%d, n=%d, k=%d, ...)\n", pcb.i, m, n, k);
  dalib_print_descriptor ("A", desc_A);
  dalib_print_descriptor ("B", desc_B);
  dalib_print_descriptor ("C", desc_C);
#endif

  pdgemm_ (transa, transb, &m, &n, &k, alpha, A, &ia, &ja, desc_A,
           B, &ib, &jb, desc_B, beta, C, &ic, &jc, desc_C);

  /* back distribution of c if necessary, destroy descriptors */

} /* hpf_dgemm */

/*********************************************************************
**********************************************************************
**                                                                  **
**  Some functionality of the tools (for work arrays)               **
**                                                                  **
**********************************************************************
*********************************************************************/

      /****************************************************
      *                                                   *
      *   dalib_numroc                                    *
      *                                                   *
      ****************************************************/

static int dalib_numroc (n, nb, iproc, isrcproc, nprocs)
int n, nb, iproc, isrcproc, nprocs;

{ int mydist, extrablks, numroc, nblocks;

  mydist = (nprocs+iproc-isrcproc) % nprocs;
  nblocks = n / nb;
  numroc  = (nblocks/nprocs) * nb;
  extrablks = nblocks % nprocs;
  if (mydist < extrablks)
     numroc = numroc + nb;
   else if (mydist == extrablks)
     numroc = numroc + n % nb;
  return (numroc);

} /* dalib_numroc */

/*********************************************************************
**********************************************************************
**                                                                  **
**  HPF SCALAPACK interface  (EXPORTED routines for FORTRAN)        **
**                                                                  **
**********************************************************************
*********************************************************************/

/*******************************************************************
*                                                                  *
*   hpf_dgetrf_ (a_id, ipiv_id, info)                              *
*                                                                  *
*   - computes an LU factiorization of a M x N matrix              *
*   - a_id stands for matrix sub( A ) = (IA:IA+M-1,JA:JA+N-1)      *
*                                                                  *
*   - sub ( A )  =  P * L * U                                      *
*                                                                  *
*******************************************************************/
 
void FUNCTION(hpf_dgetrf) (a_data, ipiv,    info, 
                           a_id,   ipiv_id, info_id)
 
section_info *a_id, *ipiv_id, *info_id;

int  ipiv[];   /* integer array in FORTRAN, replicated */
int  *info;    /* scalar integer in FORTRAN            */
char *a_data;
 
{ int m, n, ia, ja;
  int desc_A [8];
 
  char *A;

#ifdef DEBUG
  printf ("call of hpf_dgetrf_\n");
#endif
 
  dalib_get_section_info (*a_id, &m, &n, &A, &ia, &ja, desc_A);
 
#ifdef DEBUG
  printf ("%d: pdgetrf (m=%d, n=%d, _, ia = %d, ja = %d, ...)\n",
          pcb.i, m, n, ia, ja);
  dalib_print_descriptor ("A", desc_A);
#endif
 
  pdgetrf_ (&m, &n, A, &ia, &ja, desc_A, ipiv, info);
 
} /* hpf_dgetrf_ */

/****************************************************************************
*                                                                           *
*  hpf_dgetri_ (a_id, ipiv_id, info)                                        *
*                                                                           *
*  PDGETRI computes the inverse of a distributed matrix using the LU        *
*  factorization computed by PDGETRF. This method inverts U and then        *
*  computes the inverse of sub( A ) = A(IA:IA+N-1,JA:JA+N-1) denoted InvA   *
*  by solving the system InvA*L = inv(U) for InvA.                          *
*                                                                           *
****************************************************************************/

void FUNCTION(hpf_dgetri) (a_data, ipiv, info, a_id, ipiv_id, info_id)

section_info *a_id, *ipiv_id, *info_id;

int ipiv[];
int *info;
char *a_data;

{ int m, n, ia, ja;
  int desc_A [8];
  char *A;

  int *iwork;
  double *work;
  int liwork, lwork;   /* length */

  int nprow, npcol, myrow, mycol;

  dalib_get_section_info (*a_id, &m, &n, &A, &ia, &ja, desc_A);

  /* assert : m = n */

#ifdef DEBUG
  printf ("%d: pdgetri (n=%d, _, ia = %d, ja = %d, ...)\n", 
          pcb.i, n, ia, ja);
  dalib_print_descriptor ("A", desc_A);
#endif

  blacs_gridinfo_ (desc_A+CTXT_, &nprow, &npcol, &myrow, &mycol);

  /* LWORK = LOCp(N+MOD(IA-1,MB_A))*NB_A */

  lwork = n + (ia - 1) % desc_A[MB_];
  lwork = dalib_numroc (lwork, desc_A[MB_], myrow, desc_A[RSRC_], nprow);
  lwork = lwork * desc_A[NB_];

  /* LIWORK =  LOCq( M_A + MOD(IA-1, MB_A) ) + MB_A */

  liwork = m + (ia - 1)%desc_A[MB_];
  liwork = dalib_numroc (liwork, desc_A[NB_], mycol, desc_A[CSRC_], npcol);
  liwork = liwork + desc_A[MB_];

  /* printf ("work has size %d, iwork has size %d\n", lwork, liwork); */

  work   = (double*) dalib_malloc (lwork * sizeof (double), "hpf_dgetri");
  iwork  = (int*)    dalib_malloc (liwork * sizeof (int),   "hpf_dgetri");

  pdgetri_ (&n, A, &ia, &ja, desc_A, ipiv, work, &lwork, iwork, &liwork, info);

  free (iwork);
  free (work);

  /* printf ("on return, info = %d, iwork = %d\n",  *info, iwork[0]); */

} /* hpf_dgetri_ */

/************************************************************************
*                                                                       *
*  hpf_dgetrs_ (trans, a_id, ipiv, b_id, info)                          *
*                                                                       *
*  - solves op( sub( A ) ) * X = sub( B )                               *
*                                                                       *
*  sub( A ) denotes A(IA:IA+N-1,JA:JA+N-1), op( A ) = A or A**T and     *
*  sub( B ) denotes B(IB:IB+N-1,JB:JB+NRHS-1).                          *
*                                                                       *
************************************************************************/

void FUNCTION(hpf_dgetrs) (trans,    a_data, ipiv,    b_data, info,
                           trans_id, a_id,   ipiv_id, b_id,   info_id)
 
section_info *a_id, *b_id, *trans_id, *ipiv_id, *info_id;
int ipiv[];
int *info;
char *a_data, *b_data, *trans;
 
{ int m, n, n1, nrhs;
  int ia, ja, ib, jb;
  int desc_A [8], desc_B[8];

  char *A, *B;

  dalib_get_section_info (*a_id, &m, &n, &A, &ia, &ja, desc_A);
  dalib_get_section_info (*b_id, &n1, &nrhs, &B, &ib, &jb, desc_B);

  if ((m != n) || (n != n1))
    { dalib_internal_error ("hpf_dgetrs: illegal arguments");
      dalib_stop ();
    }
 
#ifdef DEBUG
  printf ("%d: pdgetrs (_, n=%d, nrhs=%d, _, ia=%d,ja=%d,_,_,ib=%d,jb=%d,..)\n",
          pcb.i, n, nrhs, ia, ja, ib, jb);
  dalib_print_descriptor ("A", desc_A);
  dalib_print_descriptor ("B", desc_B);
#endif

  pdgetrs_ (trans, &n, &nrhs, A, &ia, &ja, desc_A, ipiv, 
                              B, &ib, &jb, desc_B, info);

} /* FUNCTION(hpf_dgetrs) */

/************************************************************************
*                                                                       *
*  hpf_dpotrf_ (uplo, a_id, info)                                       *
*                                                                       *
*  PDPOTRF computes the Cholesky factorization of an N-by-N real        *
*  symmetric positive definite distributed matrix sub( A ) denoting     *
*  A(IA:IA+N-1, JA:JA+N-1).                                             *
*  The factorization has the form                                       *
*                                                                       *
*            sub( A ) = U' * U ,  if UPLO = 'U', or                     *
*            sub( A ) = L  * L',  if UPLO = 'L',                        *
*                                                                       *
*  where U is an upper triangular matrix and L is lower triangular.     *
*                                                                       *
************************************************************************/

void FUNCTION(hpf_dpotrf) (uplo,    a_data, info,
                           uplo_id, a_id,   info_id)
 
char *uplo, *a_data;
section_info *a_id, *uplo_id, *info_id;
int *info;
 
{ int m, n;
  int ia, ja;
  int desc_A [8];

  char *A;

  dalib_get_section_info (*a_id, &m, &n, &A, &ia, &ja, desc_A);

  /* m must be equal to n */

#ifdef DEBUG
  printf ("%d: pdpotrf (_, n=%d, _, ia=%d,ja=%d,..)\n",
          pcb.i, n, ia, ja);
  dalib_print_descriptor ("A", desc_A);
#endif

  pdpotrf_ (uplo, &n, A, &ia, &ja, desc_A, info);

} /* hpf_dpotrf_ */

/************************************************************************
*                                                                       *
*  hpf_dgeqrf_ (a_id, tau, info)                                        *
*                                                                       *
************************************************************************/

void hpf_dgeqrf_ (a_data, tau, info, a_id, tau_id, info_id)
 
section_info *a_id, *tau_id, *info_id;
char *tau, *a_data;
int *info;
 
{ int m, n;
  int ia, ja;
  int desc_A [8];

  char *A;
  double *work;
  int    lwork;
  int    iroff, icoff;
  int    iarow, iacol;
  int    Mp0, Nq0;
  int    nprow, npcol, myrow, mycol;

  dalib_get_section_info (*a_id, &m, &n, &A, &ia, &ja, desc_A);

  blacs_gridinfo_ (desc_A+CTXT_, &nprow, &npcol, &myrow, &mycol);
 
  /*   LWORK >= NB_A * ( Mp0 + Nq0 + NB_A ), where

       IROFF = MOD( IA-1, MB_A ), ICOFF = MOD( JA-1, NB_A ),
       IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ),
       IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ),
       Mp0   = NUMROC( M+IROFF, MB_A, MYROW, IAROW, NPROW ),
       Nq0   = NUMROC( N+ICOFF, NB_A, MYCOL, IACOL, NPCOL )     */

  iroff = (ia -1) % desc_A[MB_];
  icoff = (ja -1) % desc_A[NB_];
  iarow = (desc_A[RSRC_] + (ia -1) / desc_A[MB_]) % nprow;
  iacol = (desc_A[CSRC_] + (ja -1) / desc_A[NB_]) % npcol;
  Mp0 = dalib_numroc (m+iroff, desc_A[MB_], myrow, iarow, nprow);
  Nq0 = dalib_numroc (n+icoff, desc_A[NB_], mycol, iacol, npcol);
  lwork = desc_A [NB_] * (Mp0 + Nq0 + desc_A[NB_]);

  work   = (double*) dalib_malloc (lwork * sizeof (double), "hpf_dgeqrf");

  pdgeqrf_ (&m, &n, A, &ia, &ja, desc_A, tau, work, &lwork, info);

  free (work);

} /* hpf_dgeqrf_ */

