gcc/gmp/mpn/generic/powm.c - native_client/nacl-toolchain - Git at Google

 /* mpn_powm -- Compute R = U^E mod M.

 Copyright 2007, 2008, 2009 Free Software Foundation, Inc.

 This file is part of the GNU MP Library.

 The GNU MP Library is free software; you can redistribute it and/or modify
 it under the terms of the GNU Lesser General Public License as published by
 the Free Software Foundation; either version 3 of the License, or (at your
 option) any later version.

 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
 License for more details.

 You should have received a copy of the GNU Lesser General Public License
 along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */


 /*
   BASIC ALGORITHM, Compute b^e mod n, where n is odd.

   1. w <- b

   2. While w^2 < n (and there are more bits in e)
        w <- power left-to-right base-2 without reduction

   3. t <- (B^n * b) / n                Convert to REDC form

   4. Compute power table of e-dependent size

   5. While there are more bits in e
        w <- power left-to-right base-k with reduction


   TODO:

    * Make getbits a macro, thereby allowing it to update the index operand.
      That will simplify the code using getbits.  (Perhaps make getbits' sibling
      getbit then have similar form, for symmetry.)

    * Write an itch function.

    * Choose window size without looping.  (Superoptimize or think(tm).)

    * How do we handle small bases?

    * This is slower than old mpz code, in particular if we base it on redc_1
      (use: #undef HAVE_NATIVE_mpn_addmul_2).  Why?

    * Make it sub-quadratic.

    * Call new division functions, not mpn_tdiv_qr.

    * Is redc obsolete with improved SB division?

    * Consider special code for one-limb M.

    * CRT for N = odd*2^t:
       Using Newton's method and 2-adic arithmetic:
         m1_inv_m2 = 1/odd mod 2^t
       Plain 2-adic (REDC) modexp:
         r1 = a ^ b mod odd
       Mullo+sqrlo-based modexp:
         r2 = a ^ b mod 2^t
       mullo, mul, add:
         r = ((r2 - r1) * m1_i_m2 mod 2^t) * odd + r1

    * How should we handle the redc1/redc2/redc2/redc4/redc_subquad choice?
      - redc1: T(binvert_1limb)  + e * (n)   * (T(mullo1x1) + n*T(addmul_1))
      - redc2: T(binvert_2limbs) + e * (n/2) * (T(mullo2x2) + n*T(addmul_2))
      - redc3: T(binvert_3limbs) + e * (n/3) * (T(mullo3x3) + n*T(addmul_3))
      This disregards the addmul_N constant term, but we could think of
      that as part of the respective mulloNxN.
 */

 #include "gmp.h"
 #include "gmp-impl.h"
 #include "longlong.h"


 #define getbit(p,bi) \
   ((p[(bi - 1) / GMP_LIMB_BITS] >> (bi - 1) % GMP_LIMB_BITS) & 1)

 static inline mp_limb_t
 getbits (const mp_limb_t *p, unsigned long bi, int nbits)
 {
   int nbits_in_r;
   mp_limb_t r;
   mp_size_t i;

   if (bi < nbits)
     {
       return p[0] & (((mp_limb_t) 1 << bi) - 1);
     }
   else
     {
       bi -= nbits;			/* bit index of low bit to extract */
       i = bi / GMP_LIMB_BITS;		/* word index of low bit to extract */
       bi %= GMP_LIMB_BITS;		/* bit index in low word */
       r = p[i] >> bi;			/* extract (low) bits */
       nbits_in_r = GMP_LIMB_BITS - bi;	/* number of bits now in r */
       if (nbits_in_r < nbits)		/* did we get enough bits? */
 	r += p[i + 1] << nbits_in_r;	/* prepend bits from higher word */
       return r & (((mp_limb_t ) 1 << nbits) - 1);
     }
 }

 #undef HAVE_NATIVE_mpn_addmul_2

 #ifndef HAVE_NATIVE_mpn_addmul_2
 #define REDC_2_THRESHOLD		MP_SIZE_T_MAX
 #endif

 #ifndef REDC_2_THRESHOLD
 #define REDC_2_THRESHOLD		4
 #endif

 static void mpn_redc_n () {ASSERT_ALWAYS(0);}

 static inline int
 win_size (unsigned long eb)
 {
   int k;
   static unsigned long x[] = {1,7,25,81,241,673,1793,4609,11521,28161,~0ul};
   for (k = 0; eb > x[k]; k++)
     ;
   return k;
 }

 #define MPN_REDC_X(rp, tp, mp, n, mip)					\
   do {									\
     if (redc_x == 1)							\
       mpn_redc_1 (rp, tp, mp, n, mip[0]);				\
     else if (redc_x == 2)						\
       mpn_redc_2 (rp, tp, mp, n, mip);					\
     else								\
       mpn_redc_n (rp, tp, mp, n, mip);					\
   } while (0)

   /* Convert U to REDC form, U_r = B^n * U mod M */
 static void
 redcify (mp_ptr rp, mp_srcptr up, mp_size_t un, mp_srcptr mp, mp_size_t n)
 {
   mp_ptr tp, qp;
   TMP_DECL;
   TMP_MARK;

   tp = TMP_ALLOC_LIMBS (un + n);
   qp = TMP_ALLOC_LIMBS (un + 1);	/* FIXME: Put at tp+? */

   MPN_ZERO (tp, n);
   MPN_COPY (tp + n, up, un);
   mpn_tdiv_qr (qp, rp, 0L, tp, un + n, mp, n);
   TMP_FREE;
 }

 /* rp[n-1..0] = bp[bn-1..0] ^ ep[en-1..0] mod mp[n-1..0]
    Requires that mp[n-1..0] is odd.
    Requires that ep[en-1..0] is > 1.
    Uses scratch space tp[3n..0], i.e., 3n+1 words.  */
 void
 mpn_powm (mp_ptr rp, mp_srcptr bp, mp_size_t bn,
 	  mp_srcptr ep, mp_size_t en,
 	  mp_srcptr mp, mp_size_t n, mp_ptr tp)
 {
   mp_limb_t mip[2];
   int cnt;
   long ebi;
   int windowsize, this_windowsize;
   mp_limb_t expbits;
   mp_ptr pp, this_pp, last_pp;
   mp_ptr b2p;
   long i;
   int redc_x;
   TMP_DECL;

   ASSERT (en > 1 || (en == 1 && ep[0] > 1));
   ASSERT (n >= 1 && ((mp[0] & 1) != 0));

   TMP_MARK;

   count_leading_zeros (cnt, ep[en - 1]);
   ebi = en * GMP_LIMB_BITS - cnt;

 #if 0
   if (bn < n)
     {
       /* Do the first few exponent bits without mod reductions,
 	 until the result is greater than the mod argument.  */
       for (;;)
 	{
 	  mpn_sqr_n (tp, this_pp, tn);
 	  tn = tn * 2 - 1,  tn += tp[tn] != 0;
 	  if (getbit (ep, ebi) != 0)
 	    mpn_mul (..., tp, tn, bp, bn);
 	  ebi--;
 	}
     }
 #endif

   windowsize = win_size (ebi);

   if (BELOW_THRESHOLD (n, REDC_2_THRESHOLD))
     {
       binvert_limb (mip[0], mp[0]);
       mip[0] = -mip[0];
       redc_x = 1;
     }
 #if defined (HAVE_NATIVE_mpn_addmul_2)
   else
     {
       mpn_binvert (mip, mp, 2, tp);
       mip[0] = -mip[0]; mip[1] = ~mip[1];
       redc_x = 2;
     }
 #endif
 #if 0
   mpn_binvert (mip, mp, n, tp);
   redc_x = 0;
 #endif

   pp = TMP_ALLOC_LIMBS (n << (windowsize - 1));

   this_pp = pp;
   redcify (this_pp, bp, bn, mp, n);

   b2p = tp + 2*n;

   /* Store b^2 in b2.  */
   mpn_sqr_n (tp, this_pp, n);
   MPN_REDC_X (b2p, tp, mp, n, mip);

   /* Precompute odd powers of b and put them in the temporary area at pp.  */
   for (i = (1 << (windowsize - 1)) - 1; i > 0; i--)
     {
       last_pp = this_pp;
       this_pp += n;
       mpn_mul_n (tp, last_pp, b2p, n);
       MPN_REDC_X (this_pp, tp, mp, n, mip);
     }

   expbits = getbits (ep, ebi, windowsize);
   ebi -= windowsize;
   if (ebi < 0)
     ebi = 0;

   count_trailing_zeros (cnt, expbits);
   ebi += cnt;
   expbits >>= cnt;

   MPN_COPY (rp, pp + n * (expbits >> 1), n);

   while (ebi != 0)
     {
       while (getbit (ep, ebi) == 0)
 	{
 	  mpn_sqr_n (tp, rp, n);
 	  MPN_REDC_X (rp, tp, mp, n, mip);
 	  ebi--;
 	  if (ebi == 0)
 	    goto done;
 	}

       /* The next bit of the exponent is 1.  Now extract the largest block of
 	 bits <= windowsize, and such that the least significant bit is 1.  */

       expbits = getbits (ep, ebi, windowsize);
       ebi -= windowsize;
       this_windowsize = windowsize;
       if (ebi < 0)
 	{
 	  this_windowsize += ebi;
 	  ebi = 0;
 	}

       count_trailing_zeros (cnt, expbits);
       this_windowsize -= cnt;
       ebi += cnt;
       expbits >>= cnt;

       do
 	{
 	  mpn_sqr_n (tp, rp, n);
 	  MPN_REDC_X (rp, tp, mp, n, mip);
 	  this_windowsize--;
 	}
       while (this_windowsize != 0);

       mpn_mul_n (tp, rp, pp + n * (expbits >> 1), n);
       MPN_REDC_X (rp, tp, mp, n, mip);
     }

  done:
   MPN_COPY (tp, rp, n);
   MPN_ZERO (tp + n, n);
   MPN_REDC_X (rp, tp, mp, n, mip);
   if (mpn_cmp (rp, mp, n) >= 0)
     mpn_sub_n (rp, rp, mp, n);
   TMP_FREE;
 }
	/* mpn_powm -- Compute R = U^E mod M.

	Copyright 2007, 2008, 2009 Free Software Foundation, Inc.

	This file is part of the GNU MP Library.

	The GNU MP Library is free software; you can redistribute it and/or modify
	it under the terms of the GNU Lesser General Public License as published by
	the Free Software Foundation; either version 3 of the License, or (at your
	option) any later version.

	The GNU MP Library is distributed in the hope that it will be useful, but
	WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
	or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
	License for more details.

	You should have received a copy of the GNU Lesser General Public License
	along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */


	/*
	BASIC ALGORITHM, Compute b^e mod n, where n is odd.

	1. w <- b

	2. While w^2 < n (and there are more bits in e)
	w <- power left-to-right base-2 without reduction

	3. t <- (B^n * b) / n Convert to REDC form

	4. Compute power table of e-dependent size

	5. While there are more bits in e
	w <- power left-to-right base-k with reduction


	TODO:

	* Make getbits a macro, thereby allowing it to update the index operand.
	That will simplify the code using getbits. (Perhaps make getbits' sibling
	getbit then have similar form, for symmetry.)

	* Write an itch function.

	* Choose window size without looping. (Superoptimize or think(tm).)

	* How do we handle small bases?

	* This is slower than old mpz code, in particular if we base it on redc_1
	(use: #undef HAVE_NATIVE_mpn_addmul_2). Why?

	* Make it sub-quadratic.

	* Call new division functions, not mpn_tdiv_qr.

	* Is redc obsolete with improved SB division?

	* Consider special code for one-limb M.

	* CRT for N = odd*2^t:
	Using Newton's method and 2-adic arithmetic:
	m1_inv_m2 = 1/odd mod 2^t
	Plain 2-adic (REDC) modexp:
	r1 = a ^ b mod odd
	Mullo+sqrlo-based modexp:
	r2 = a ^ b mod 2^t
	mullo, mul, add:
	r = ((r2 - r1) * m1_i_m2 mod 2^t) * odd + r1

	* How should we handle the redc1/redc2/redc2/redc4/redc_subquad choice?
	- redc1: T(binvert_1limb) + e * (n) * (T(mullo1x1) + n*T(addmul_1))
	- redc2: T(binvert_2limbs) + e * (n/2) * (T(mullo2x2) + n*T(addmul_2))
	- redc3: T(binvert_3limbs) + e * (n/3) * (T(mullo3x3) + n*T(addmul_3))
	This disregards the addmul_N constant term, but we could think of
	that as part of the respective mulloNxN.
	*/

	#include "gmp.h"
	#include "gmp-impl.h"
	#include "longlong.h"


	#define getbit(p,bi) \
	((p[(bi - 1) / GMP_LIMB_BITS] >> (bi - 1) % GMP_LIMB_BITS) & 1)

	static inline mp_limb_t
	getbits (const mp_limb_t *p, unsigned long bi, int nbits)
	{
	int nbits_in_r;
	mp_limb_t r;
	mp_size_t i;

	if (bi < nbits)
	{
	return p[0] & (((mp_limb_t) 1 << bi) - 1);
	}
	else
	{
	bi -= nbits; /* bit index of low bit to extract */
	i = bi / GMP_LIMB_BITS; /* word index of low bit to extract */
	bi %= GMP_LIMB_BITS; /* bit index in low word */
	r = p[i] >> bi; /* extract (low) bits */
	nbits_in_r = GMP_LIMB_BITS - bi; /* number of bits now in r */
	if (nbits_in_r < nbits) /* did we get enough bits? */
	r += p[i + 1] << nbits_in_r; /* prepend bits from higher word */
	return r & (((mp_limb_t ) 1 << nbits) - 1);
	}
	}

	#undef HAVE_NATIVE_mpn_addmul_2

	#ifndef HAVE_NATIVE_mpn_addmul_2
	#define REDC_2_THRESHOLD MP_SIZE_T_MAX
	#endif

	#ifndef REDC_2_THRESHOLD
	#define REDC_2_THRESHOLD 4
	#endif

	static void mpn_redc_n () {ASSERT_ALWAYS(0);}

	static inline int
	win_size (unsigned long eb)
	{
	int k;
	static unsigned long x[] = {1,7,25,81,241,673,1793,4609,11521,28161,~0ul};
	for (k = 0; eb > x[k]; k++)
	;
	return k;
	}

	#define MPN_REDC_X(rp, tp, mp, n, mip) \
	do { \
	if (redc_x == 1) \
	mpn_redc_1 (rp, tp, mp, n, mip[0]); \
	else if (redc_x == 2) \
	mpn_redc_2 (rp, tp, mp, n, mip); \
	else \
	mpn_redc_n (rp, tp, mp, n, mip); \
	} while (0)

	/* Convert U to REDC form, U_r = B^n * U mod M */
	static void
	redcify (mp_ptr rp, mp_srcptr up, mp_size_t un, mp_srcptr mp, mp_size_t n)
	{
	mp_ptr tp, qp;
	TMP_DECL;
	TMP_MARK;

	tp = TMP_ALLOC_LIMBS (un + n);
	qp = TMP_ALLOC_LIMBS (un + 1); /* FIXME: Put at tp+? */

	MPN_ZERO (tp, n);
	MPN_COPY (tp + n, up, un);
	mpn_tdiv_qr (qp, rp, 0L, tp, un + n, mp, n);
	TMP_FREE;
	}

	/* rp[n-1..0] = bp[bn-1..0] ^ ep[en-1..0] mod mp[n-1..0]
	Requires that mp[n-1..0] is odd.
	Requires that ep[en-1..0] is > 1.
	Uses scratch space tp[3n..0], i.e., 3n+1 words. */
	void
	mpn_powm (mp_ptr rp, mp_srcptr bp, mp_size_t bn,
	mp_srcptr ep, mp_size_t en,
	mp_srcptr mp, mp_size_t n, mp_ptr tp)
	{
	mp_limb_t mip[2];
	int cnt;
	long ebi;
	int windowsize, this_windowsize;
	mp_limb_t expbits;
	mp_ptr pp, this_pp, last_pp;
	mp_ptr b2p;
	long i;
	int redc_x;
	TMP_DECL;

	ASSERT (en > 1 \|\| (en == 1 && ep[0] > 1));
	ASSERT (n >= 1 && ((mp[0] & 1) != 0));

	TMP_MARK;

	count_leading_zeros (cnt, ep[en - 1]);
	ebi = en * GMP_LIMB_BITS - cnt;

	#if 0
	if (bn < n)
	{
	/* Do the first few exponent bits without mod reductions,
	until the result is greater than the mod argument. */
	for (;;)
	{
	mpn_sqr_n (tp, this_pp, tn);
	tn = tn * 2 - 1, tn += tp[tn] != 0;
	if (getbit (ep, ebi) != 0)
	mpn_mul (..., tp, tn, bp, bn);
	ebi--;
	}
	}
	#endif

	windowsize = win_size (ebi);

	if (BELOW_THRESHOLD (n, REDC_2_THRESHOLD))
	{
	binvert_limb (mip[0], mp[0]);
	mip[0] = -mip[0];
	redc_x = 1;
	}
	#if defined (HAVE_NATIVE_mpn_addmul_2)
	else
	{
	mpn_binvert (mip, mp, 2, tp);
	mip[0] = -mip[0]; mip[1] = ~mip[1];
	redc_x = 2;
	}
	#endif
	#if 0
	mpn_binvert (mip, mp, n, tp);
	redc_x = 0;
	#endif

	pp = TMP_ALLOC_LIMBS (n << (windowsize - 1));

	this_pp = pp;
	redcify (this_pp, bp, bn, mp, n);

	b2p = tp + 2*n;

	/* Store b^2 in b2. */
	mpn_sqr_n (tp, this_pp, n);
	MPN_REDC_X (b2p, tp, mp, n, mip);

	/* Precompute odd powers of b and put them in the temporary area at pp. */
	for (i = (1 << (windowsize - 1)) - 1; i > 0; i--)
	{
	last_pp = this_pp;
	this_pp += n;
	mpn_mul_n (tp, last_pp, b2p, n);
	MPN_REDC_X (this_pp, tp, mp, n, mip);
	}

	expbits = getbits (ep, ebi, windowsize);
	ebi -= windowsize;
	if (ebi < 0)
	ebi = 0;

	count_trailing_zeros (cnt, expbits);
	ebi += cnt;
	expbits >>= cnt;

	MPN_COPY (rp, pp + n * (expbits >> 1), n);

	while (ebi != 0)
	{
	while (getbit (ep, ebi) == 0)
	{
	mpn_sqr_n (tp, rp, n);
	MPN_REDC_X (rp, tp, mp, n, mip);
	ebi--;
	if (ebi == 0)
	goto done;
	}

	/* The next bit of the exponent is 1. Now extract the largest block of
	bits <= windowsize, and such that the least significant bit is 1. */

	expbits = getbits (ep, ebi, windowsize);
	ebi -= windowsize;
	this_windowsize = windowsize;
	if (ebi < 0)
	{
	this_windowsize += ebi;
	ebi = 0;
	}

	count_trailing_zeros (cnt, expbits);
	this_windowsize -= cnt;
	ebi += cnt;
	expbits >>= cnt;

	do
	{
	mpn_sqr_n (tp, rp, n);
	MPN_REDC_X (rp, tp, mp, n, mip);
	this_windowsize--;
	}
	while (this_windowsize != 0);

	mpn_mul_n (tp, rp, pp + n * (expbits >> 1), n);
	MPN_REDC_X (rp, tp, mp, n, mip);
	}

	done:
	MPN_COPY (tp, rp, n);
	MPN_ZERO (tp + n, n);
	MPN_REDC_X (rp, tp, mp, n, mip);
	if (mpn_cmp (rp, mp, n) >= 0)
	mpn_sub_n (rp, rp, mp, n);
	TMP_FREE;
	}