/*  file: ecm2d.c
*         changed by Alex Stuebinger
*         for ANSIONLY compilers
*
*   date: 08/04/98 15:26
*/




/* GMP-ECM Version 2d.
   Author: Paul Zimmermann, January 1998, zimmerma@loria.fr
   with many contributions from T. Granlund (TMG Datakonsult)
   and Paul Leyland.
   See http://www.loria.fr/~zimmerma/records/ecmnet.html

Changes with respect to 2a:
- use base-2 division for factors of 2^k+/-1 (BASE2 macro, default 1)
- added division using pre-inversion (macro PREINVERT): no saving
- now prints a warning for probable prime input
- now checks if factors found are composite or not
- now checks for prime powers
Changes with respect to 2b:
- saved a factor of two in step 2 initialization, and a factor of two
  in memory needed by step 2
- changed B2 and m in step 2 to be double's --> no overflow any more,
  even on 32-bit machines.
- fixed bug for multiple-line input (thanks to Torbjorn and P. Leyland)
Changes with respect to 2c:
- added LARGE macro for large input, like Fermat numbers, to disable
  primality and perfect-power tests

This version uses the Montgomery form (8) from [2] which avoid's gcd's:

        b*y^2*z = x^3 + a*x^2*z + x*z^2

References:
[1] "Speeding the Pollard and Elliptic Curve Methods of Factorization", by Peter
   Montgomery, Math. of Comp. 48 (177), pages 243-264, January 1987.
[2] "Factorization of the tenth and eleventh Fermat numbers", by Richard Brent,
ftp://nimbus.anu.edu.au/pub/Brent/rpb...

Examples (log and timing lines omitted):

% echo 137703491 | ecm 100 6
********** Factor found during step 1: 17389

% echo 137703491 | ecm 100 13
********** Factor found during step 2: 7919

From [2], page 15 (factorization of 55^126+1):
% echo 5394204444759808120647321820789847518754252780933425517607611172590240019087317088600360602042567541009369753816111824690753627535877960715703346991252857 | ecm 345551 805816989
********** Factor found during step 1: 25233450176615986500234063824208915571213

% ecm 314263 14152267 4677853 < F10.cofactor
Input number is 607820568181834328745927047401406785398975700821911559763928675076909152806525747797078707978021962487854849079350770968904705424125269800765765006449689562590686195386366153585734177565092347016126765195631310982002631912943551551593959032889971392442015624176361633631364310142874363629569
********** Factor found during step 2: 4659775785220018543264560743076778192897

# first Cunningham factor found by GMP-ECM (06 Dec 1997)
% echo 449590253344339769860648131841615148645295989319968106906219761704350259884936939123964073775456979170209297434164627098624602597663490109944575251386017 | ecm 1000000 63844855
********** Factor found during step 2: 241421225374647262615077397

# p48 found by Richard Brent on October 9, 1997
% echo 3923385745693995079670229419275984584311007321932374190635656246740175165573932140787529348954892963218868359081838772941945556717 | ecm 141667 876329474 150814537
********** Factor found during step 2: 662926550178509475639682769961460088456141816377

# p45 found by Richard Brent on October 24, 1997
% echo 89101594496537524661600025466303491594098940711325290746374420963129505171895306244425914080753573576861992127359576789001 | ecm 325001 877655087 1032299
********** Factor found during step 2: 122213491239590733375594767461662771175707001

# p40 of p(14561) found by PZ with ecmfft
% echo 5735013127104523546495917836490637235369 | ecm 329941 3848610099745584498259560038340842096471 4043946241 2527419713481530878734189429997880136878
*/

#include <ctype.h>
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>

#ifndef ANSIONLY
#include <sys/times.h>
#include <sys/time.h>
#endif

#include "gmp.h"

#ifdef DEBUG2
#define DEBUG
#endif

#ifndef max
#define max(a,b) ((a)>(b)) ? (a) : (b)
#define min(a,b) ((a)<(b)) ? (a) : (b)
#endif

#ifdef ANSIONLY
int random(void)
{  return rand() ^ (rand()<<16);
}
#endif

#define BASE2 1 /* special base 2 optimization */

/* ANSI Prototypes */
extern void printout(__mpz_struct *,__mpz_struct *);
extern int ecm(__mpz_struct *,__mpz_struct *,int ,double );
extern int step1(__mpz_struct *);
extern void initprimes(double ,int );
extern int treat(__mpz_struct *,unsigned int );
extern void multiply(unsigned int );
extern void add(__mpz_struct *,__mpz_struct *,__mpz_struct *,__mpz_struct *);
extern void duplicate(__mpz_struct *,__mpz_struct *,__mpz_struct *,__mpz_struct *);
extern int step2(__mpz_struct *,double );
extern int cputime(void);
extern int isbase2(__mpz_struct *);
extern void mod2plus(__mpz_struct *,__mpz_struct *,__mpz_struct *);
extern void mod2minus(__mpz_struct *,__mpz_struct *,__mpz_struct *);
extern void compare2(__mpz_struct *);
extern void mod(__mpz_struct *,__mpz_struct *,__mpz_struct *);
extern void mpz_mod2(__mpz_struct *,__mpz_struct *,__mpz_struct *);
extern void preinvert(__mpz_struct *);
extern void mpz_swap(mpz_ptr ,mpz_ptr );
extern int mpz_root(mpz_ptr ,mpz_srcptr ,unsigned long );
extern int mpz_perfect_power_p(mpz_srcptr );



/* global variables */
unsigned int B1,bb,*prime,nbprimes,mul,lgn; int ispower2;
char *pr;
mpz_t a,b,n,u,v,w,x,z,x1,z1,x2,z2,one,y,invn;

/*
struct tms ti;
*/

void initprimes(),multiply(),add(),duplicate(),compare2(),mpz_mod2(),
     preinvert();
#ifdef BASE2
void mod();
#else
#define mod mpz_mod
#endif

/* Usage: ecm B1 [s] [B2] < file */
int main(argc,argv) int argc; char *argv[];
{
   int r,s; char c; mpz_t p; double B2;

   if (argc<2) {
     printf("Usage: ecm B1 [sigma] [B2] < file\n");
     printf("       ecm B1 A B2 x1 < file\n");
     exit(1);
   }
   printf("GMP-ECM version 2d, by P. Zimmermann (Inria), February 1998,\n");
   printf("with many contributions from T. Granlund and P. Leyland.\n");
   mpz_init(a); mpz_init(b); mpz_init(n); mpz_init(u); mpz_init(v);
   mpz_init(w); mpz_init(x); mpz_init(x1); mpz_init(z1); mpz_init(x2);
   mpz_init(z2); mpz_init(z);
   mpz_init_set_ui(one,1); mpz_init(p); mpz_init(y); mpz_init(invn);
   B1 = atoi(argv[1]);
   /* initialize table of primes */
   bb=0; initprimes((double)B1,0);
   s = (argc>=3) ? atoi(argv[2]) : 0;
   B2 = (argc>=4) ? atof(argv[3]) : 0.0;
   if (argc>=5) {
     mpz_set_str(a,argv[2],10); mpz_set_str(x,argv[4],10);
   }
   else mpz_set_ui(x,0);
#ifdef ANSIONLY
  if (s==0) {
    time_t timer;
    struct tm tp;
    time(&timer);
    tp = *localtime(&timer);
    srand(tp.tm_hour * 3600 + tp.tm_min * 60 + tp.tm_sec);
  }
#else
   if (s==0) {
     /* the following 3 lines are due to Torbjorn Granlund */
     struct timeval tp;
     gettimeofday(&tp, NULL);
     srand48(65536 * tp.tv_sec + tp.tv_usec + getpid());
   }
#endif
   while (!feof(stdin)) {
     mpz_inp_str(n,stdin,10);
#ifndef LARGE
     printf("Input number is "); mpz_out_str(stdout,10,n); printf("\n");
     fflush(stdout);
#endif
     if (r=ecm(p,n,s,B2)) {
       if (mpz_cmp(p,n)) {
	 if (mpz_probab_prime_p(p,25))
	   printf("Found probable prime factor: ");
	 else printf("Found COMPOSITE factor: ");
	 mpz_out_str(stdout,10,p);
	   }
       else { printf("Found input number N"); r=0; }
       printf("\n"); fflush(stdout);
     }
     while (!feof(stdin) && !isdigit(c=getchar()));
     /* exit with 0 iff a factor found for the last input, as suggested
	by T. Granlund. Allows to do:
	while ecm 1000000 <mycomposite; do true; done
	n=1000; while [ $n -gt 0 ]; do ecm 1000000 <mycomposite && break; n=`expr $n - 1`; done
	*/
     if (feof(stdin)) exit((r) ? 0 : 1);
     ungetc(c,stdin);
     mpz_set_ui(x,0);
   }
}

/* print (x::z) */
void printout(x,z) mpz_t x,z;
{
   printf("("); mpz_out_str(stdout,10,x); printf(" : : ");
   mpz_out_str(stdout,10,z); printf(")");
}

/* factors n and puts the result in p, s is the seed (0 -> random)
   returns 0 iff no factor found
*/
int ecm(p,n,s,B2) mpz_t p,n; int s; double B2;
{
   unsigned int st;

   mpz_set_ui(p,6); mpz_gcd(p,n,p); if (mpz_cmp(p,one)) return(1);
   /* now gcd(n,6)=1 */
#ifndef LARGE
   if (mpz_probab_prime_p(n,25))
     printf("******* Warning: probable prime input\n");
   else if (mpz_perfect_power_p(n))
     printf("******* Warning: input is a perfect power\n");
#endif
#ifdef PREINVERT
   preinvert(n);
#endif
#ifdef BASE2
   if (ispower2=isbase2(n)) {
     printf("recognized factor of 2^%d",(ispower2>0) ? ispower2 : -ispower2);
     if (ispower2>0) printf("+"); else printf("-");
     printf("1\n");
     compare2(n);
     fflush(stdout);
   }
#endif
   if (mpz_cmp_ui(x,0)) /* start from given a and x instead of s */
     mpz_set_ui(z,1);
   else {
   /* generates a random starting point using (11) from [2], or take the 's' given */
   if (s) mpz_set_ui(u,s);
   else { /* generate a random sigma */
#ifdef ANSIONLY
     mpz_set_ui(v, random()); mpz_mod(u,v,n);
#else
     mpz_set_ui(v,lrand48()); mpz_mod(u,v,n);
#endif
   }
   printf("Using B1=%d and sigma=",B1); mpz_out_str(stdout,10,u);
   printf("\n"); fflush(stdout);
   mpz_mul_ui(w,u,4); mpz_mod(v,w,n); /* v = (4*s) mod n */
   mpz_mul(x,u,u); mpz_sub_ui(w,x,5); mpz_mod(u,w,n); /* u = (s^2-5) mod n */
   mpz_mul(x,u,u); mpz_mul(w,x,u); mpz_mod(x,w,n); /* x = u^3 mod n */
   mpz_mul(z,v,v); mpz_mul(w,z,v); mpz_mod(z,w,n); /* z:=v^3 mod n */
   mpz_mul(b,x,v); mpz_mul_ui(w,b,4); mpz_mod(b,w,n); /* b = (4*x*v) mod n */
   mpz_sub(a,v,u); mpz_mul(w,a,a); mpz_mul(w,w,a); mpz_mod(w,w,n); /* w = (v-u)^3*/
   mpz_mul_ui(a,u,3); mpz_add(a,a,v); mpz_mul(w,w,a); mpz_mod(a,w,n);
   /* a = ((v-u)^3*(3*u+v)) mod n */
   mpz_gcdext(p,u,NULL,b,n); /* w = gcd(b,n) = u*b mod n */
   if (mpz_cmp(p,one)) return(1);
   mpz_mul(a,a,u); mpz_sub_ui(a,a,2); mpz_mod(a,a,n); /* a = a/b-2 mod n */
 }
#ifndef LARGE
   printf("A="); mpz_out_str(stdout,10,a); printf("\n"); fflush(stdout);
#endif
   mpz_add_ui(b,a,2);
   if (mpz_mod_ui(w,b,2)) mpz_add(b,b,n); mpz_tdiv_q_2exp(b,b,1); /* b = b/2 */
   if (mpz_mod_ui(w,b,2)) mpz_add(b,b,n); mpz_tdiv_q_2exp(b,b,1); /* b = b/2 */
   mpz_gcdext(p,u,NULL,z,n); if (mpz_cmp(p,one)) return(1);
   mpz_mul(x,x,u); mpz_mod(x,x,n);
   mpz_set_ui(z,1);
#ifndef LARGE
   printf("starting point: x="); mpz_out_str(stdout,10,x); printf("\n");
   fflush(stdout);
#endif
   /* now b = (a+2)/4 mod n */
   /* Step 1 */
   st=cputime(); mul=0;
   if (step1(p)) {
     printf("********** Factor found during step 1: "); mpz_out_str(stdout,10,p);
     printf("\n"); fflush(stdout); return(1);
   }
   printf("Step 1 took %dms for %d multiplications\n",
		      cputime()-st,mul);
   fflush(stdout); mul=0;
#ifdef DEBUG
   printf("Q="); printout(x,z); printf("\n"); fflush(stdout);
#endif
   st=cputime();
   if (step2(p,B2)) {
     printf("********** Factor found during step 2: "); mpz_out_str(stdout,10,p);
     printf("\n"); fflush(stdout); return(1);
   }
   printf("Step 2 took %dms for %d multiplications\n",
		      cputime()-st,mul);
   fflush(stdout);
   return(0);
}

/* returns 0 iff no factor found, otherwise returns factor in p */
int step1(p) mpz_t p;
{
  unsigned int l,i,j,q,imax,lmax;

  lmax = B1/bb;
  for (l=0;l<=lmax;l++) {
#ifdef DEBUG
    printf("at %d\n",l*bb);
#endif
    /* check range l*bb <= p < (l+1)*bb */
    if (l) { /* sieve primes, pr[i] corresponds to l*bb+i */
      for (i=0;i<bb;i++) pr[i]='1';
      for (j=1;j<=nbprimes;j++) {
	/* delete multiples of prime[j] */
	q=prime[j];
	i=(q-((l*bb)%q)) % q;
	for(;i<bb;i+=q) pr[i]='0';
      }
    }
    else {
      for (i=0;i<bb;i++) pr[i]='0';
      for (j=1;j<=nbprimes;j++) pr[prime[j]]='1';
    }
    imax = ((B1+1)<(l+1)*bb) ? B1+1-l*bb : bb;
    for (i=0;i<imax;i++)
      if (pr[i]=='1') {
	q=l*bb+i;
#ifdef DEBUG
	printf("dealing with prime %d\n",q);
#endif
        if (treat(p,q)) {
	  printf("last prime is %d\n",q);
	  return(1);
	}
      }
  }
  return(0);
}

/* initializes tables of primes up to max(sqrt(B),b) */
void initprimes(B,b) double B; int b;
{
  int i,j;

  i = (int)ceil(sqrt(B+0.5));
  if (i>b) b=i;
  if (b<=bb) return; /* already done */
#ifdef DEBUG
  printf("B=%f b=%d\n",B,b);
#endif
  pr = (char*) malloc(b+1);
  /* compute primes up to b */
  for (i=2;i<=b;i++) pr[i]='1';
  j=2; do {
    for (i=j*j;i<=b;i+=j) pr[i]='0';
    while (pr[++j]=='0');
  } while (j*j<=b);
  for (nbprimes=0,i=2;i<=b;i++) if (pr[i]=='1') nbprimes++;
  prime = (unsigned int*) malloc((nbprimes+1)*sizeof(int));
  for (j=0,i=2;i<=b;i++) if (pr[i]=='1') prime[++j]=i;
#ifdef DEBUG
  printf("%d primes\n",j);
#endif
  bb=b;
}

/* multiplies P=(x:z) by the largest power of p <= B1,
   returns 0 iff no factor found, otherwise returns factor in ff
*/
int treat(ff,p) mpz_t ff; unsigned int p;
{
   unsigned int q,qmax;

#ifndef NONORMALIZE
   /* normalizes z to 1 */
   mpz_gcdext(ff,v,NULL,z,n);
   if (mpz_cmp(ff,one)) return(1);
   mpz_mul(x,x,v); mul++; mpz_mod(x,x,n); mpz_set(z,one);
#endif
#ifdef DEBUG
   printf("start: "); printout(x,z); printf("\n");
#endif
   qmax = B1/p;
   for (q=p;q<=qmax;q*=p);
   multiply(q);
#ifdef DEBUG
      printf("finished p=%u\n",p);
#endif
   mpz_gcd(ff,n,z);
   if (mpz_cmp(ff,one)) return(1);
   return(0);
}

/* computes qP from P=(x:z) and puts the result in (x:z). Assumes q>2. */
void multiply(q) unsigned int q;
{
   int l,r;
#ifdef DEBUG
   int m;
#endif

   r=--q; l=-1; while (r != 1) {r>>=1; l++; }
   /* loop invariant: (x1:z1)=mP, (x2:z2)=(m+1)P */
   /* treat first case separately */
   if (q & (1<<l)) { /* (P,2P) -> (3P,4P) */
     mpz_set(x1,x); mpz_set(z1,z); /* 1P = (x1:z1) */
     duplicate(x2,z2,x1,z1); /* 2P = (x2:z2) */
     add(x1,z1,x2,z2);
     duplicate(x2,z2,x2,z2);
#ifdef DEBUG
     m = 3;
#endif
   }
   else { /* (P,2P) -> (2P,3P) */
     mpz_set(x2,x); mpz_set(z2,z); /* 1P = (x2:z2) */
     duplicate(x1,z1,x2,z2); /* 2P = (x1:z1) */
#ifdef DEBUG2
printf("after duplicate, x1="); mpz_out_str(stdout,10,x1); printf("\n");
printf("after duplicate, z1="); mpz_out_str(stdout,10,z1); printf("\n");
printf("after duplicate, x2="); mpz_out_str(stdout,10,x2); printf("\n");
printf("after duplicate, z2="); mpz_out_str(stdout,10,z2); printf("\n");
#endif
     add(x2,z2,x1,z1); /* (x2:z2) <- 3P */
#ifdef DEBUG2
printf("after add, x2="); mpz_out_str(stdout,10,x2); printf("\n");
printf("after add, z2="); mpz_out_str(stdout,10,z2); printf("\n");
#endif
#ifdef DEBUG
     m = 2;
#endif
   }
#ifdef DEBUG2
      printf("after one step, %dP=",m); printout(x1,z1); printf(" %dP=",m+1);
      printout(x2,z2);
      printf("\n");
#endif
   l--;
   while (l>=1) {
      if (q & (1<<l)) { /* (mP,(m+1)P) -> ((2m+1)P,(2m+2)P), i.e. m <- 2*m+1 */
         add(x1,z1,x2,z2); /* (x1:z1) <- (2m+1)P */
         duplicate(x2,z2,x2,z2); /* (x2:z2) <- (2m+2)P */
#ifdef DEBUG
         m = (m<<1)+1; /* m = 2*m+1 */
#endif
      }
      else { /* (mP,(m+1)P) -> (2mP,(2m+1)P) i.e. m <- 2*m */
         add(x2,z2,x1,z1); /* (x2:z2) <- (2m+1)P */
         duplicate(x1,z1,x1,z1); /* (x1:z1) <- 2mP */
#ifdef DEBUG
         m <<= 1; /* m = 2*m */
#endif
      }
#ifdef DEBUG2
      printf("after one step, %dP=",m); printout(x1,z1); printf(" %dP=",m+1);
      printout(x2,z2);
      printf("\n");
#endif
      l--;
   }
   /* treat the case l=0 isolately, because
        only x2,z2 are needed: one can avoid either an addition if q mod 2 = 1
        or a duplication if q mod 2 = 0, and two assignments */
   if (q&1) {
#ifdef DEBUG2
printf("before duplicate, x2="); mpz_out_str(stdout,10,x2); printf("\n");
printf("before duplicate, z2="); mpz_out_str(stdout,10,z2); printf("\n");
#endif
duplicate(x,z,x2,z2); /* will happen for p=2 only */
#ifdef DEBUG2
printf("after duplicate, x="); mpz_out_str(stdout,10,x); printf("\n");
printf("after duplicate, z="); mpz_out_str(stdout,10,z); printf("\n");
#endif
	    }
   else { add(x2,z2,x1,z1); mpz_set(x,x2); mpz_set(z,z2); } /* for every odd prime */
}

/* adds Q=(x2:z2) and R=(x1:z1) and puts the result in (x2:z2),
     using 5 mul, 6 add/sub and 6 mod. One assumes that Q-R=P or R-Q=P where P=(x:z).
     Uses the following global variables:
     - n : number to factor
     - x, z : coordinates of P
     - u, v, w : auxiliary variables
Modifies: x2, z2, u, v, w.
*/
void add(x2,z2,x1,z1) mpz_t x2,z2,x1,z1;
{
   mpz_sub(u,x2,z2); mpz_add(v,x1,z1);
   mpz_mul(w,u,v); mod(u,w,n); /* u = ((x2-z2)*(x1+z1)) mod n */
   mpz_add(w,x2,z2); mpz_sub(x2,x1,z1);
   mpz_mul(z2,w,x2); mod(v,z2,n); /* v = ((x2+z2)*(x1-z1)) mod n */
   mpz_add(w,u,v); mpz_mul(x2,w,w); /* x2 = (u+v)^2 mod n */
#ifdef NONORMALIZE
   mpz_mul(x2,x2,z); mul++;
#endif
   mod(x2,x2,n); /* x2 = (z*x2) mod n */
   mpz_sub(w,u,v); mpz_mul(z2,w,w);
#ifdef DEBUG2
printf("z2="); mpz_out_str(stdout,10,z2); printf("\n");
#endif
   mod(w,z2,n); /* w = (u-v)^2 mod n */
   mpz_mul(u,x,w); mod(z2,u,n); /* z2 = (x*w) mod n */
   mul += 5;
}

/* computes 2P=(x2:z2) from P=(x1:z1), with 5 mul, 4 add/sub, 5 mod.
     Uses the following global variables:
     - n : number to factor
     - b : (a+2)/4 mod n
     - u, v, w : auxiliary variables
Modifies: x2, z2, u, v, w
*/
void duplicate(x2,z2,x1,z1) mpz_t x2,z2,x1,z1;
{
   mpz_add(u,x1,z1); mpz_mul(w,u,u); mod(u,w,n); /* u = (x1+z1)^2 mod n */
   mpz_sub(v,x1,z1); mpz_mul(w,v,v); mod(v,w,n); /* v = (x1-z1)^2 mod n */
   mpz_mul(w,u,v); mod(x2,w,n); /* x2 = (u*v) mod n */
   mpz_sub(w,u,v); /* w = u-v = 4*x1*z1 */
   mpz_mul(u,b,w); mpz_add(z2,u,v); mod(u,z2,n); /* u = (v+b*w) mod n */
   mpz_mul(v,w,u); mod(z2,v,n); /* z2 = (w*u) mod n */
   mul += 5;
}

/* Step 2: improved standard continuation, cf [2] p. 7-8.
   Use the following global variables:
   - n: number to factor
   - B1: bound for step 1
   - x, z: coordinates of Q at the beginning of step 2
   Uses B2 if not zero, otherwise defines it from B1.
   Returns 0 iff no factor found, otherwise puts factor in p.
*/
int step2(p,B2) mpz_t p; double B2;
{
   mpz_t *nQx,g; int i,st,D; double m; unsigned int j,q;

   st=cputime();
   /* Q:=[x,z]; */
   if (B2==0.0) /* thanks to Torbjorn for the following few lines */
     {
       B2=100.0*(double)B1;
   } else if (B2<=(double)B1) return(0); /* no step 2 */
   D=(int)sqrt(B2/2.0);
   if (D%2) D++; /* ensures m and m-D have same parity in step 2 */
   initprimes(B2,2*D);
   printf("start step 2 with B1=%d, B2=%f, D=%d\n",B1,B2,D);
   /* with Q the point computed by Step 1, we precompute 2*d*Q for 1 <= d <= D,
     2*i*Q is stored in nQ[i] */
   nQx = (mpz_t*) malloc((D+1)*sizeof(mpz_t));
   /* normalizes (x,z) */
   mpz_gcdext(p,u,NULL,z,n); if (mpz_cmp(p,one)) return(1);
   mpz_mul(w,x,u); mul++; mod(x,w,n); mpz_set(z,one);
#ifndef LARGE
   printf("x="); mpz_out_str(stdout,10,x); printf("\n"); fflush(stdout);
#endif
   mpz_init_set(nQx[0],x); /* save x because it will be changed below */
   mpz_set(x1,x); mpz_set(z1,z);
   mpz_init_set(g,one);
   duplicate(x2,z2,x1,z1); /* compute 2*Q */
   mpz_gcdext(p,u,NULL,z2,n); if (mpz_cmp(p,one)) return(1);
   mpz_mul(w,x2,u); mul++; mod(x2,w,n); mpz_init_set(nQx[1],x2);
   mpz_set(x,x2); mpz_set(z,one);
   for (i=2;i<=D;i++) { /* compute 2*i*Q */
      if (i%2==0) { /* 2*i*Q = 2*(i*Q) */
         duplicate(x2,z2,nQx[i>>1],one);
      }
      else { /* 2*i*Q = (i+1)*Q+(i-1)*Q */
         mpz_set(x2,nQx[(i+1)>>1]); mpz_set(z2,one);
         add(x2,z2,nQx[(i-1)>>1],one);
      }
      /* normalizes z2 to 1 to save one multiplication below */
      mpz_gcdext(p,u,NULL,z2,n); if (mpz_cmp(p,one)) return(1);
      mpz_mul(w,x2,u); mul++; mpz_init(nQx[i]); mod(nQx[i],w,n);
   }
   /* now we compute independently (2D+1)Q */
   mpz_set(x,x1); mpz_set(z,z1);
   multiply(2*D-1); /* puts (2D-1)Q in x */
   mpz_gcdext(p,u,NULL,z,n); if (mpz_cmp(p,one)) return(1);
   mpz_mul(w,x,u); mul++; mod(x,w,n); mpz_set(z,one);
   /* then we compute mQ for m=1, 2*D+1, 4*D+1, ... */
   /* loop invariant: (m-2D)Q=(x1:z1) (m-4*D)Q=(x:z=1) */
   mpz_set(x1,nQx[0]); mpz_set(z1,one);
   printf("initialization of Step 2 took %dms\n",cputime()-st);
   for (m=1.0;m<B2+(double)D;m+=2.0*(double)D) {
#ifdef DEBUG
      printf("m=%f\n",m);
#endif
      if (m!=1.0) { /* compute m*Q = (m-2D)Q + 2DQ */
         mpz_set(x2,x1); mpz_set(z2,z1);
         add(x1,z1,nQx[D],one);
         /* normalizes z1 to 1 to save one multiplication below
	    and because needed in add */
         mpz_gcdext(p,u,NULL,z1,n); if (mpz_cmp(p,one)) return(1);
         mpz_mul(w,x1,u); mul++; mod(x1,w,n); mpz_set(z1,one);
         mpz_set(x,x2); mpz_set(z,z2);
      }
      /* now nQ[m]:=[x1,z1] */
      if (m+(double)D>(double)B1) {
	 /* sieve primes in m-D..m+D */
         if (m==1.0) {
	   for (i=2*((B1-1)/2);i<=2*D;i+=2) pr[i]='0';
	   for (j=2;j<=nbprimes;j++) pr[prime[j]-1]='1';
	 }
         else { /* pr[i]=1 iff m-D+i is prime */
	   for (i=2;i<=2*D;i+=2) pr[i]='1';
	   for (j=2;j<=nbprimes;j++) {
	     /* delete multiples of prime[j] */
	     q=prime[j];
	     /* i=(q-((m-D)%q)) % q; */
	     i=(q+D-(int)(0.5+fmod(m,(double)q))) % q;
	     for(;i<=2*D;i+=q) pr[i]='0';
	   }
	 }
#ifdef DEBUG2
	 printf("x1="); mpz_out_str(stdout,10,x1); printf("\n");
#endif
         for (i=0;i<D;i+=2)
            if (pr[i]=='1') {
#ifdef DEBUG
	      printf("dealing with prime %f\n",m-(double)D+(double)i);
#endif
	      mpz_sub(w,x1,nQx[(D-i)>>1]); mpz_mul(u,g,w); mul++; mod(g,u,n);
	    }
         for (i=D+2;i<=2*D;i+=2)
            if (pr[i]=='1' && pr[2*D-i]=='0') {
#ifdef DEBUG
	      printf("dealing with prime %f\n",m-(double)D+(double)i);
#endif
	      mpz_sub(w,x1,nQx[(i-D)>>1]); mpz_mul(u,g,w); mul++; mod(g,u,n);
	    }
         mpz_gcd(p,g,n); if (mpz_cmp(p,one)) {
	   printf("last interval is %f..%f\n",m-(double)D,m+(double)D);
	   return(1);
	 }
      }
   }
   mpz_clear(g); /* thanks to Paul Leyland */
   for (i=0;i<=D;i++) mpz_clear(nQx[i]);
   free(nQx);
   return(0);
 }

/* Return user CPU time measured in milliseconds. Thanks to Torbjorn. */
#if defined (ANSIONLY) || defined (USG) || defined (__SVR4) || defined (_UNICOS) || defined(HPUX)
#include <time.h>

int
cputime ()
{
  if (CLOCKS_PER_SEC < 100000)
    return clock () * 1000 / CLOCKS_PER_SEC;
  return clock () / (CLOCKS_PER_SEC / 1000);
}
#else
#include <sys/types.h>
#include <sys/resource.h>

int
cputime ()
{
  struct rusage rus;

  getrusage (0, &rus);
  return rus.ru_utime.tv_sec * 1000 + rus.ru_utime.tv_usec / 1000;
}
#endif

#ifdef BASE2
/* returns +/-k if n is a factor of N=2^k+/-1 with N<=n^2, 0 otherwise */
int isbase2(n) mpz_t n;
{
  unsigned int k,lo;

  lo=mpz_sizeinbase(n,2)-1; /* from Torbjorn */
  mpz_set_ui(u,1); mpz_mul_2exp(u,u,lo);
  for (k=lo;k<=2*lo;k++) {
    mpz_sub_ui(v,u,1); /* v = 2^k-1 */
    mpz_mod(w,v,n);
    if (mpz_cmp_ui(w,0)==0) return(-(int)k);
    mpz_add_ui(v,u,1); /* v = 2^k+1 */
    mpz_mod(w,v,n);
    if (mpz_cmp_ui(w,0)==0) return(k);
    mpz_mul_2exp(u,u,1); /* u = 2*u */
  }
  return(0);
}

void mod2plus(a,b,n) mpz_t a,b,n; /* N = 2^k + 1, ispower2>0 */
{
  /* 2^k = -1 */
    mpz_tdiv_r_2exp(y,b,ispower2);
    mpz_tdiv_q_2exp(a,b,ispower2);
    mpz_sub(a,y,a);
    mpz_mod(a,a,n);
}

void mod2minus(a,b,n) mpz_t a,b,n; /* N = 2^k - 1, ispower2<0 */
{
  /* 2^k = 1 */
    mpz_tdiv_r_2exp(y,b,-ispower2);
    mpz_tdiv_q_2exp(a,b,-ispower2);
    mpz_add(a,y,a);
    mpz_mod(a,a,n);
}

/* compares timings for generic division to base-2 division */
void compare2(n) mpz_t n;
{
  unsigned int st,i,l,st2,m; int isp;

  isp=ispower2; l=(unsigned int) mpz_size(n);
  mpz_random(u,l); mpz_mod(u,u,n);
  mpz_random(v,l); mpz_mod(v,v,n);
  mpz_mul(w,u,v);
  ispower2=0; m=1000000/l/l+1;
  st=cputime();
  for (i=0;i<m;i++) mod(u,w,n);
  st=cputime()-st;
  ispower2=isp;
  st2=cputime();
  for (i=0;i<m;i++) mod(u,w,n);
  st2=cputime()-st2;
  if (st<st2) {
    ispower2=0; /* use generic division */
  }
  else {
    printf("Using special base-2 division (%f faster)\n",(double)st/st2);
  }
}

/* WARNING: a may be identical to b */
void mod(a,b,n) mpz_t a,b,n;
{
  if (ispower2==0)
#ifdef PREINVERT
    mpz_mod2(a,b,n);
#else
    mpz_mod(a,b,n);
#endif
  else if (ispower2>0) mod2plus(a,b,n);
  else mod2minus(a,b,n);
}
#endif

/* a <- b mod n
Warning: a and b may be identical */
void mpz_mod2(a,b,n) mpz_t a,b,n;
{
   /* unsigned int i; */
   mpz_tdiv_q_2exp(y,b,lgn);
   mpz_mul(y,y,invn);
   mpz_tdiv_q_2exp(y,y,lgn);
   mpz_mul(y,y,n);
   mpz_sub(a,b,y);
   while (mpz_cmp(a,n)>=0) mpz_sub(a,a,n);
}

void preinvert(n) mpz_t n;
{
   lgn=8*sizeof(mp_limb_t)*mpz_size(n);
   mpz_set_ui(u,1);
   mpz_mul_2exp(u,u,lgn);
   for (;mpz_cmp(n,u)<0;lgn--) {
     mpz_tdiv_q_2exp(u,u,1);
   }
   printf("2^%u < N < 2^%u\n",lgn,lgn+1);
   lgn++;
   mpz_set_ui(invn,1);
   mpz_mul_2exp(invn,invn,2*lgn);
   mpz_fdiv_q(invn,invn,n);
}

/* all the following code is from Torbjorn */
#ifndef GMP21
void
mpz_swap (u, v)
     mpz_ptr u;
     mpz_ptr v;
{
  mp_ptr up, vp;
  mp_size_t usize, vsize;
  mp_size_t ualloc, valloc;

  ualloc = u->_mp_alloc;
  valloc = v->_mp_alloc;
  v->_mp_alloc = ualloc;
  u->_mp_alloc = valloc;

  usize = u->_mp_size;
  vsize = v->_mp_size;
  v->_mp_size = usize;
  u->_mp_size = vsize;

  up = u->_mp_d;
  vp = v->_mp_d;
  v->_mp_d = up;
  u->_mp_d = vp;
}
#endif

/* Naive implementation of nth root extraction.  It would probably be a
   better idea to use a division-free Newton iteration.  It is insane
   to use full precision from iteration 1.  The mpz_scan1 trick compensates
   to some extent, but is nothing I am proud of.  */

int mpz_root (r, c, nth)
mpz_ptr r; mpz_srcptr c; unsigned long int nth;
{
  mpz_t x, t0, t1, t2, t3;
  unsigned long int nbits;
  int exact;
  int i;
  unsigned long int lowz;
#if DEBUG
  int itercnt;
#endif

  mpz_init (x);
  mpz_init (t0);
  mpz_init (t1);
  mpz_init (t2);
  mpz_init (t3);

  nbits = mpz_sizeinbase (c, 2);
  mpz_set_ui (x, 1);
  nbits = (nbits - 1) / nth;
  mpz_mul_2exp (x, x, nbits);

  mpz_pow_ui (t1, x, nth);
  if (mpz_cmp (c, t1) < 0)
    abort ();
  mpz_mul_2exp (t2, x, 1);
  mpz_pow_ui (t1, t2, nth);
  if (mpz_cmp (c, t1) >= 0)
    abort ();

  /* Make the approximation better.  */
  for (i = 1; (nth >> i) != 0; i++)
    {
      if (nbits < i)
	break;

      mpz_setbit (x, nbits - i);
      mpz_tdiv_q_2exp (t0, x, nbits - i);
      mpz_pow_ui (t1, t0, nth);
      mpz_mul_2exp (t1, t1, (nbits - i) * nth);
      if (mpz_cmp (c, t1) < 0)
	mpz_clrbit (x, nbits - i);
    }
  if (nbits >= i)
    mpz_setbit (x, nbits - i);

#if DEBUG
  itercnt = 0;
#endif
  do
    {
#if DEBUG
      itercnt++;
#endif
      lowz = mpz_scan1 (x, 0);
      mpz_tdiv_q_2exp (t0, x, lowz);
      mpz_pow_ui (t1, t0, nth - 1);
      mpz_mul_2exp (t1, t1, lowz * (nth - 1));
      mpz_tdiv_q (t2, c, t1);
      mpz_sub (t2, x, t2);
      mpz_tdiv_q_ui (t3, t2, nth);
      mpz_sub (x, x, t3);
    }
  while (mpz_sgn (t3) != 0);

#if DEBUG
  {
    static char *ext[] = {"th","st","nd","rd","th","th","th","th","th","th"};
    printf ("Computed %lu%s root of a %ld limb number in %d iterations\n",
	    nth, ext[(nth - 10) % 100 < 10 ? 0 : nth % 10],
	    (long) mpz_size (c), itercnt);
  }
#endif

  lowz = mpz_scan1 (x, 0);
  mpz_tdiv_q_2exp (t0, x, lowz);
  mpz_pow_ui (t1, t0, nth);
  mpz_mul_2exp (t1, t1, lowz * nth);
  if (mpz_cmp (c, t1) < 0)
    {
      mpz_sub_ui (x, x, 1);
      lowz = mpz_scan1 (x, 0);
      mpz_tdiv_q_2exp (t0, x, lowz);
      mpz_pow_ui (t1, t0, nth);
      mpz_mul_2exp (t1, t1, lowz * nth);
    }

  exact = mpz_cmp (t1, c) == 0;

  if (r != NULL)
    mpz_set (r, x);

  mpz_clear (t3);
  mpz_clear (t2);
  mpz_clear (t1);
  mpz_clear (t0);
  mpz_clear (x);

  return exact;
}

/*
  Suppose we are to determine if c is a perfect power, c = a ^ b.
  Assume c is divisible by 2^n and that codd = c/2^n is odd.
  Assume a is divisible by 2^m and that aodd = a/2^m is odd.
  It is always true that m divides n.

  * If n is prime, either 1) a is 2*aodd and b = n
		       or 2) a = c and b = 1.
    So for n prime, we readily have a solution.
  * If n is factorable into the non-trivial factors p1,p2,...
    Since m divides n, m has a subset of n's factors and b = n / m.

    BUG: Should handle negative number, since they can be off perfect powers.
*/

/* This is a naive approach to determining perfect powers.
   But it is at least a start that can be developed into something
   for GMP 2.1.  */

static unsigned long int gcd ();
static int isprime ();

static unsigned short primes[] =
{  2,  3,  5,  7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53,
  59, 61, 67, 71, 73, 79, 83, 89, 97,101,103,107,109,113,127,131,
 137,139,149,151,157,163,167,173,179,181,191,193,197,199,211,223,
 227,229,233,239,241,251,257,263,269,271,277,281,283,293,307,311,
 313,317,331,337,347,349,353,359,367,373,379,383,389,397,401,409,
 419,421,431,433,439,443,449,457,461,463,467,479,487,491,499,503,
 509,521,523,541,547,557,563,569,571,577,587,593,599,601,607,613,
 617,619,631,641,643,647,653,659,661,673,677,683,691,701,709,719,
 727,733,739,743,751,757,761,769,773,787,797,809,811,821,823,827,
 829,839,853,857,859,863,877,881,883,887,907,911,919,929,937,941,
 947,953,967,971,977,983,991,997,0
};
#define SMALLEST_OMITTED_PRIME 1009


int mpz_perfect_power_p (u) mpz_srcptr u;
{
  unsigned long int prime;
  unsigned long int n, n2;
  int i;
  unsigned long int rem;
  mpz_t u2, q;
  int exact;

  if (mpz_cmp_ui (u, 1) <= 0)
    return 0;

  n2 = mpz_scan1 (u, 0);
  if (n2 == 1)
    return 0;

  mpz_init (q);
  mpz_init (u2);

  mpz_tdiv_q_2exp (u2, u, n2);

  if (isprime (n2))
    goto n2prime;

  for (i = 1; primes[i] != 0; i++)
    {
      prime = primes[i];
      rem = mpz_fdiv_ui (u2, prime);
      if (rem == 0)		/* divisable? */
	{
	  rem = mpz_fdiv_q_ui (q, u2, prime * prime);
	  if (rem != 0)
	    {
	      mpz_clear (q);
	      mpz_clear (u2);
	      return 0;
	    }
	  mpz_swap (q, u2);
	  for (n = 2;;)
	    {
	      rem = mpz_fdiv_q_ui (q, u2, prime);
	      if (rem != 0)
		break;
	      mpz_swap (q, u2);
	      n++;
	    }

	  n2 = gcd (n2, n);
	  if (n2 == 1)
	    {
	      mpz_clear (q);
	      mpz_clear (u2);
	      return 0;
	    }

	  /* As soon as n2 becomes a prime number, stop factoring.
	     Either we have u=x^n2 or u is not a perfect power.  */
	  if (isprime (n2))
	    goto n2prime;
	}
    }

  if (mpz_cmp_ui (u2, 1) == 0)
    {
      mpz_clear (q);
      mpz_clear (u2);
      return 1;
    }

  if (n2 == 0)
    {
      unsigned long int nth;
      /* We did not find any factors above.  We have to consider all values
	 of n.  */
      for (nth = 2;; nth++)
	{
	  if (! isprime (nth))
	    continue;
#if 0
	  exact = mpz_padic_root (q, u2, nth, PTH);
	  if (exact)
#endif
	    exact = mpz_root (q, u2, nth);
	  if (exact)
	    {
	      mpz_clear (q);
	      mpz_clear (u2);
	      return 1;
	    }
	  if (mpz_cmp_ui (q, SMALLEST_OMITTED_PRIME) < 0)
	    {
	      mpz_clear (q);
	      mpz_clear (u2);
	      return 0;
	    }
	}
    }
  else
    {
      unsigned long int nth;
      /* We found some factors above.  We just need to consider values of n
	 that divides n2.  */
      for (nth = 2; nth <= n2; nth++)
	{
	  if (! isprime (nth))
	    continue;
	  if (n2 % nth != 0)
	    continue;
#if 0
	  exact = mpz_padic_root (q, u2, nth, PTH);
	  if (exact)
#endif
	    exact = mpz_root (q, u2, nth);
	  if (exact)
	    {
	      mpz_clear (q);
	      mpz_clear (u2);
	      return 1;
	    }
	  if (mpz_cmp_ui (q, SMALLEST_OMITTED_PRIME) < 0)
	    {
	      mpz_clear (q);
	      mpz_clear (u2);
	      return 0;
	    }
	}

      mpz_clear (q);
      mpz_clear (u2);
      return 0;
    }


n2prime:
  exact = mpz_root (NULL, u2, n2);
  mpz_clear (q);
  mpz_clear (u2);
  return exact;
}

static unsigned long int gcd (a, b)
unsigned long int a, b;
{
  int an2, bn2, n2;

  if (a == 0)
    return b;
  if (b == 0)
    return a;

  for (an2 = 0; (a & 1) == 0; an2++)
    a >>= 1;

  for (bn2 = 0; (b & 1) == 0; bn2++)
    b >>= 1;

  n2 = an2 < bn2 ? an2 : bn2;

  while (a != b)
    {
      if (a > b)
	{
	  a -= b;
	  do
	    a >>= 1;
	  while ((a & 1) == 0);
	}
      else /*  b > a.  */
	{
	  b -= a;
	  do
	    b >>= 1;
	  while ((b & 1) == 0);
	}
    }

  return a << n2;
}

static int isprime (t) unsigned long int t;
{
  unsigned long int q, r, d;

  if (t < 3 || (t & 1) == 0)
    return t == 2;

  for (d = 3, r = 1; r != 0; d += 2)
    {
      q = t / d;
      r = t - q * d;
      if (q < d)
	return 1;
    }
  return 0;
}
