gcc/gmp/mpn/generic/sqrtrem.c - native_client/nacl-toolchain - Git at Google

 /* mpn_sqrtrem -- square root and remainder

    Contributed to the GNU project by Paul Zimmermann (most code) and
    Torbjorn Granlund (mpn_sqrtrem1).

    THE FUNCTIONS IN THIS FILE EXCEPT mpn_sqrtrem ARE INTERNAL WITH A
    MUTABLE INTERFACE.  IT IS ONLY SAFE TO REACH THEM THROUGH DOCUMENTED
    INTERFACES.  IN FACT, IT IS ALMOST GUARANTEED THAT THEY WILL CHANGE OR
    DISAPPEAR IN A FUTURE GMP RELEASE.

 Copyright 1999, 2000, 2001, 2002, 2004, 2005, 2008 Free Software Foundation,
 Inc.

 This file is part of the GNU MP Library.

 The GNU MP Library is free software; you can redistribute it and/or modify
 it under the terms of the GNU Lesser General Public License as published by
 the Free Software Foundation; either version 3 of the License, or (at your
 option) any later version.

 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
 License for more details.

 You should have received a copy of the GNU Lesser General Public License
 along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */


 /* See "Karatsuba Square Root", reference in gmp.texi.  */


 #include <stdio.h>
 #include <stdlib.h>

 #include "gmp.h"
 #include "gmp-impl.h"
 #include "longlong.h"

 static const unsigned short invsqrttab[384] =
 {
   0x1ff,0x1fd,0x1fb,0x1f9,0x1f7,0x1f5,0x1f3,0x1f2, /* sqrt(1/80)..sqrt(1/87) */
   0x1f0,0x1ee,0x1ec,0x1ea,0x1e9,0x1e7,0x1e5,0x1e4, /* sqrt(1/88)..sqrt(1/8f) */
   0x1e2,0x1e0,0x1df,0x1dd,0x1db,0x1da,0x1d8,0x1d7, /* sqrt(1/90)..sqrt(1/97) */
   0x1d5,0x1d4,0x1d2,0x1d1,0x1cf,0x1ce,0x1cc,0x1cb, /* sqrt(1/98)..sqrt(1/9f) */
   0x1c9,0x1c8,0x1c6,0x1c5,0x1c4,0x1c2,0x1c1,0x1c0, /* sqrt(1/a0)..sqrt(1/a7) */
   0x1be,0x1bd,0x1bc,0x1ba,0x1b9,0x1b8,0x1b7,0x1b5, /* sqrt(1/a8)..sqrt(1/af) */
   0x1b4,0x1b3,0x1b2,0x1b0,0x1af,0x1ae,0x1ad,0x1ac, /* sqrt(1/b0)..sqrt(1/b7) */
   0x1aa,0x1a9,0x1a8,0x1a7,0x1a6,0x1a5,0x1a4,0x1a3, /* sqrt(1/b8)..sqrt(1/bf) */
   0x1a2,0x1a0,0x19f,0x19e,0x19d,0x19c,0x19b,0x19a, /* sqrt(1/c0)..sqrt(1/c7) */
   0x199,0x198,0x197,0x196,0x195,0x194,0x193,0x192, /* sqrt(1/c8)..sqrt(1/cf) */
   0x191,0x190,0x18f,0x18e,0x18d,0x18c,0x18c,0x18b, /* sqrt(1/d0)..sqrt(1/d7) */
   0x18a,0x189,0x188,0x187,0x186,0x185,0x184,0x183, /* sqrt(1/d8)..sqrt(1/df) */
   0x183,0x182,0x181,0x180,0x17f,0x17e,0x17e,0x17d, /* sqrt(1/e0)..sqrt(1/e7) */
   0x17c,0x17b,0x17a,0x179,0x179,0x178,0x177,0x176, /* sqrt(1/e8)..sqrt(1/ef) */
   0x176,0x175,0x174,0x173,0x172,0x172,0x171,0x170, /* sqrt(1/f0)..sqrt(1/f7) */
   0x16f,0x16f,0x16e,0x16d,0x16d,0x16c,0x16b,0x16a, /* sqrt(1/f8)..sqrt(1/ff) */
   0x16a,0x169,0x168,0x168,0x167,0x166,0x166,0x165, /* sqrt(1/100)..sqrt(1/107) */
   0x164,0x164,0x163,0x162,0x162,0x161,0x160,0x160, /* sqrt(1/108)..sqrt(1/10f) */
   0x15f,0x15e,0x15e,0x15d,0x15c,0x15c,0x15b,0x15a, /* sqrt(1/110)..sqrt(1/117) */
   0x15a,0x159,0x159,0x158,0x157,0x157,0x156,0x156, /* sqrt(1/118)..sqrt(1/11f) */
   0x155,0x154,0x154,0x153,0x153,0x152,0x152,0x151, /* sqrt(1/120)..sqrt(1/127) */
   0x150,0x150,0x14f,0x14f,0x14e,0x14e,0x14d,0x14d, /* sqrt(1/128)..sqrt(1/12f) */
   0x14c,0x14b,0x14b,0x14a,0x14a,0x149,0x149,0x148, /* sqrt(1/130)..sqrt(1/137) */
   0x148,0x147,0x147,0x146,0x146,0x145,0x145,0x144, /* sqrt(1/138)..sqrt(1/13f) */
   0x144,0x143,0x143,0x142,0x142,0x141,0x141,0x140, /* sqrt(1/140)..sqrt(1/147) */
   0x140,0x13f,0x13f,0x13e,0x13e,0x13d,0x13d,0x13c, /* sqrt(1/148)..sqrt(1/14f) */
   0x13c,0x13b,0x13b,0x13a,0x13a,0x139,0x139,0x139, /* sqrt(1/150)..sqrt(1/157) */
   0x138,0x138,0x137,0x137,0x136,0x136,0x135,0x135, /* sqrt(1/158)..sqrt(1/15f) */
   0x135,0x134,0x134,0x133,0x133,0x132,0x132,0x132, /* sqrt(1/160)..sqrt(1/167) */
   0x131,0x131,0x130,0x130,0x12f,0x12f,0x12f,0x12e, /* sqrt(1/168)..sqrt(1/16f) */
   0x12e,0x12d,0x12d,0x12d,0x12c,0x12c,0x12b,0x12b, /* sqrt(1/170)..sqrt(1/177) */
   0x12b,0x12a,0x12a,0x129,0x129,0x129,0x128,0x128, /* sqrt(1/178)..sqrt(1/17f) */
   0x127,0x127,0x127,0x126,0x126,0x126,0x125,0x125, /* sqrt(1/180)..sqrt(1/187) */
   0x124,0x124,0x124,0x123,0x123,0x123,0x122,0x122, /* sqrt(1/188)..sqrt(1/18f) */
   0x121,0x121,0x121,0x120,0x120,0x120,0x11f,0x11f, /* sqrt(1/190)..sqrt(1/197) */
   0x11f,0x11e,0x11e,0x11e,0x11d,0x11d,0x11d,0x11c, /* sqrt(1/198)..sqrt(1/19f) */
   0x11c,0x11b,0x11b,0x11b,0x11a,0x11a,0x11a,0x119, /* sqrt(1/1a0)..sqrt(1/1a7) */
   0x119,0x119,0x118,0x118,0x118,0x118,0x117,0x117, /* sqrt(1/1a8)..sqrt(1/1af) */
   0x117,0x116,0x116,0x116,0x115,0x115,0x115,0x114, /* sqrt(1/1b0)..sqrt(1/1b7) */
   0x114,0x114,0x113,0x113,0x113,0x112,0x112,0x112, /* sqrt(1/1b8)..sqrt(1/1bf) */
   0x112,0x111,0x111,0x111,0x110,0x110,0x110,0x10f, /* sqrt(1/1c0)..sqrt(1/1c7) */
   0x10f,0x10f,0x10f,0x10e,0x10e,0x10e,0x10d,0x10d, /* sqrt(1/1c8)..sqrt(1/1cf) */
   0x10d,0x10c,0x10c,0x10c,0x10c,0x10b,0x10b,0x10b, /* sqrt(1/1d0)..sqrt(1/1d7) */
   0x10a,0x10a,0x10a,0x10a,0x109,0x109,0x109,0x109, /* sqrt(1/1d8)..sqrt(1/1df) */
   0x108,0x108,0x108,0x107,0x107,0x107,0x107,0x106, /* sqrt(1/1e0)..sqrt(1/1e7) */
   0x106,0x106,0x106,0x105,0x105,0x105,0x104,0x104, /* sqrt(1/1e8)..sqrt(1/1ef) */
   0x104,0x104,0x103,0x103,0x103,0x103,0x102,0x102, /* sqrt(1/1f0)..sqrt(1/1f7) */
   0x102,0x102,0x101,0x101,0x101,0x101,0x100,0x100  /* sqrt(1/1f8)..sqrt(1/1ff) */
 };

 /* Compute s = floor(sqrt(a0)), and *rp = a0 - s^2.  */

 #if GMP_NUMB_BITS > 32
 #define MAGIC 0x10000000000	/* 0xffe7debbfc < MAGIC < 0x232b1850f410 */
 #else
 #define MAGIC 0x100000		/* 0xfee6f < MAGIC < 0x29cbc8 */
 #endif

 static mp_limb_t
 mpn_sqrtrem1 (mp_ptr rp, mp_limb_t a0)
 {
 #if GMP_NUMB_BITS > 32
   mp_limb_t a1;
 #endif
   mp_limb_t x0, t2, t, x2;
   unsigned abits;

   ASSERT_ALWAYS (GMP_NAIL_BITS == 0);
   ASSERT_ALWAYS (GMP_LIMB_BITS == 32 || GMP_LIMB_BITS == 64);
   ASSERT (a0 >= GMP_NUMB_HIGHBIT / 2);

   /* Use Newton iterations for approximating 1/sqrt(a) instead of sqrt(a),
      since we can do the former without division.  As part of the last
      iteration convert from 1/sqrt(a) to sqrt(a).  */

   abits = a0 >> (GMP_LIMB_BITS - 1 - 8);	/* extract bits for table lookup */
   x0 = invsqrttab[abits - 0x80];		/* initial 1/sqrt(a) */

   /* x0 is now an 8 bits approximation of 1/sqrt(a0) */

 #if GMP_NUMB_BITS > 32
   a1 = a0 >> (GMP_LIMB_BITS - 1 - 32);
   t = (mp_limb_signed_t) (0x2000000000000l - 0x30000  - a1 * x0 * x0) >> 16;
   x0 = (x0 << 16) + ((mp_limb_signed_t) (x0 * t) >> (16+2));

   /* x0 is now an 16 bits approximation of 1/sqrt(a0) */

   t2 = x0 * (a0 >> (32-8));
   t = t2 >> 25;
   t = ((mp_limb_signed_t) ((a0 << 14) - t * t - MAGIC) >> (32-8));
   x0 = t2 + ((mp_limb_signed_t) (x0 * t) >> 15);
   x0 >>= 32;
 #else
   t2 = x0 * (a0 >> (16-8));
   t = t2 >> 13;
   t = ((mp_limb_signed_t) ((a0 << 6) - t * t - MAGIC) >> (16-8));
   x0 = t2 + ((mp_limb_signed_t) (x0 * t) >> 7);
   x0 >>= 16;
 #endif

   /* x0 is now a full limb approximation of sqrt(a0) */

   x2 = x0 * x0;
   if (x2 + 2*x0 <= a0 - 1)
     {
       x2 += 2*x0 + 1;
       x0++;
     }

   *rp = a0 - x2;
   return x0;
 }


 #define Prec (GMP_NUMB_BITS >> 1)

 /* same as mpn_sqrtrem, but for size=2 and {np, 2} normalized
    return cc such that {np, 2} = sp[0]^2 + cc*2^GMP_NUMB_BITS + rp[0] */
 static mp_limb_t
 mpn_sqrtrem2 (mp_ptr sp, mp_ptr rp, mp_srcptr np)
 {
   mp_limb_t qhl, q, u, np0, sp0, rp0, q2;
   int cc;

   ASSERT (np[1] >= GMP_NUMB_HIGHBIT / 2);

   np0 = np[0];
   sp0 = mpn_sqrtrem1 (rp, np[1]);
   qhl = 0;
   rp0 = rp[0];
   while (rp0 >= sp0)
     {
       qhl++;
       rp0 -= sp0;
     }
   /* now rp0 < sp0 < 2^Prec */
   rp0 = (rp0 << Prec) + (np0 >> Prec);
   u = 2 * sp0;
   q = rp0 / u;
   u = rp0 - q * u;
   q += (qhl & 1) << (Prec - 1);
   qhl >>= 1; /* if qhl=1, necessary q=0 as qhl*2^Prec + q <= 2^Prec */
   /* now we have (initial rp0)<<Prec + np0>>Prec = (qhl<<Prec + q) * (2sp0) + u */
   sp0 = ((sp0 + qhl) << Prec) + q;
   cc = u >> Prec;
   rp0 = ((u << Prec) & GMP_NUMB_MASK) + (np0 & (((mp_limb_t) 1 << Prec) - 1));
   /* subtract q * q or qhl*2^(2*Prec) from rp */
   q2 = q * q;
   cc -= (rp0 < q2) + qhl;
   rp0 -= q2;
   /* now subtract 2*q*2^Prec + 2^(2*Prec) if qhl is set */
   if (cc < 0)
     {
       if (sp0 != 0)
 	{
 	  rp0 += sp0;
 	  cc += rp0 < sp0;
 	}
       else
 	cc++;
       --sp0;
       rp0 += sp0;
       cc += rp0 < sp0;
     }

   rp[0] = rp0;
   sp[0] = sp0;
   return cc;
 }

 /* writes in {sp, n} the square root (rounded towards zero) of {np, 2n},
    and in {np, n} the low n limbs of the remainder, returns the high
    limb of the remainder (which is 0 or 1).
    Assumes {np, 2n} is normalized, i.e. np[2n-1] >= B/4
    where B=2^GMP_NUMB_BITS.  */
 static mp_limb_t
 mpn_dc_sqrtrem (mp_ptr sp, mp_ptr np, mp_size_t n)
 {
   mp_limb_t q;			/* carry out of {sp, n} */
   int c, b;			/* carry out of remainder */
   mp_size_t l, h;

   ASSERT (np[2 * n - 1] >= GMP_NUMB_HIGHBIT / 2);

   if (n == 1)
     c = mpn_sqrtrem2 (sp, np, np);
   else
     {
       l = n / 2;
       h = n - l;
       q = mpn_dc_sqrtrem (sp + l, np + 2 * l, h);
       if (q != 0)
 	mpn_sub_n (np + 2 * l, np + 2 * l, sp + l, h);
       q += mpn_divrem (sp, 0, np + l, n, sp + l, h);
       c = sp[0] & 1;
       mpn_rshift (sp, sp, l, 1);
       sp[l - 1] |= (q << (GMP_NUMB_BITS - 1)) & GMP_NUMB_MASK;
       q >>= 1;
       if (c != 0)
 	c = mpn_add_n (np + l, np + l, sp + l, h);
       mpn_sqr_n (np + n, sp, l);
       b = q + mpn_sub_n (np, np, np + n, 2 * l);
       c -= (l == h) ? b : mpn_sub_1 (np + 2 * l, np + 2 * l, 1, (mp_limb_t) b);
       q = mpn_add_1 (sp + l, sp + l, h, q);

       if (c < 0)
 	{
 	  c += mpn_addmul_1 (np, sp, n, CNST_LIMB(2)) + 2 * q;
 	  c -= mpn_sub_1 (np, np, n, CNST_LIMB(1));
 	  q -= mpn_sub_1 (sp, sp, n, CNST_LIMB(1));
 	}
     }

   return c;
 }


 mp_size_t
 mpn_sqrtrem (mp_ptr sp, mp_ptr rp, mp_srcptr np, mp_size_t nn)
 {
   mp_limb_t *tp, s0[1], cc, high, rl;
   int c;
   mp_size_t rn, tn;
   TMP_DECL;

   ASSERT (nn >= 0);
   ASSERT_MPN (np, nn);

   /* If OP is zero, both results are zero.  */
   if (nn == 0)
     return 0;

   ASSERT (np[nn - 1] != 0);
   ASSERT (rp == NULL || MPN_SAME_OR_SEPARATE_P (np, rp, nn));
   ASSERT (rp == NULL || ! MPN_OVERLAP_P (sp, (nn + 1) / 2, rp, nn));
   ASSERT (! MPN_OVERLAP_P (sp, (nn + 1) / 2, np, nn));

   high = np[nn - 1];
   if (nn == 1 && (high & GMP_NUMB_HIGHBIT))
     {
       mp_limb_t r;
       sp[0] = mpn_sqrtrem1 (&r, high);
       if (rp != NULL)
 	rp[0] = r;
       return r != 0;
     }
   count_leading_zeros (c, high);
   c -= GMP_NAIL_BITS;

   c = c / 2; /* we have to shift left by 2c bits to normalize {np, nn} */
   tn = (nn + 1) / 2; /* 2*tn is the smallest even integer >= nn */

   TMP_MARK;
   if (nn % 2 != 0 || c > 0)
     {
       tp = TMP_ALLOC_LIMBS (2 * tn);
       tp[0] = 0;	     /* needed only when 2*tn > nn, but saves a test */
       if (c != 0)
 	mpn_lshift (tp + 2 * tn - nn, np, nn, 2 * c);
       else
 	MPN_COPY (tp + 2 * tn - nn, np, nn);
       rl = mpn_dc_sqrtrem (sp, tp, tn);
       /* We have 2^(2k)*N = S^2 + R where k = c + (2tn-nn)*GMP_NUMB_BITS/2,
 	 thus 2^(2k)*N = (S-s0)^2 + 2*S*s0 - s0^2 + R where s0=S mod 2^k */
       c += (nn % 2) * GMP_NUMB_BITS / 2;		/* c now represents k */
       s0[0] = sp[0] & (((mp_limb_t) 1 << c) - 1);	/* S mod 2^k */
       rl += mpn_addmul_1 (tp, sp, tn, 2 * s0[0]);	/* R = R + 2*s0*S */
       cc = mpn_submul_1 (tp, s0, 1, s0[0]);
       rl -= (tn > 1) ? mpn_sub_1 (tp + 1, tp + 1, tn - 1, cc) : cc;
       mpn_rshift (sp, sp, tn, c);
       tp[tn] = rl;
       if (rp == NULL)
 	rp = tp;
       c = c << 1;
       if (c < GMP_NUMB_BITS)
 	tn++;
       else
 	{
 	  tp++;
 	  c -= GMP_NUMB_BITS;
 	}
       if (c != 0)
 	mpn_rshift (rp, tp, tn, c);
       else
 	MPN_COPY_INCR (rp, tp, tn);
       rn = tn;
     }
   else
     {
       if (rp == NULL)
 	rp = TMP_ALLOC_LIMBS (nn);
       if (rp != np)
 	MPN_COPY (rp, np, nn);
       rn = tn + (rp[tn] = mpn_dc_sqrtrem (sp, rp, tn));
     }

   MPN_NORMALIZE (rp, rn);

   TMP_FREE;
   return rn;
 }
	/* mpn_sqrtrem -- square root and remainder

	Contributed to the GNU project by Paul Zimmermann (most code) and
	Torbjorn Granlund (mpn_sqrtrem1).

	THE FUNCTIONS IN THIS FILE EXCEPT mpn_sqrtrem ARE INTERNAL WITH A
	MUTABLE INTERFACE. IT IS ONLY SAFE TO REACH THEM THROUGH DOCUMENTED
	INTERFACES. IN FACT, IT IS ALMOST GUARANTEED THAT THEY WILL CHANGE OR
	DISAPPEAR IN A FUTURE GMP RELEASE.

	Copyright 1999, 2000, 2001, 2002, 2004, 2005, 2008 Free Software Foundation,
	Inc.

	This file is part of the GNU MP Library.

	The GNU MP Library is free software; you can redistribute it and/or modify
	it under the terms of the GNU Lesser General Public License as published by
	the Free Software Foundation; either version 3 of the License, or (at your
	option) any later version.

	The GNU MP Library is distributed in the hope that it will be useful, but
	WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
	or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
	License for more details.

	You should have received a copy of the GNU Lesser General Public License
	along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */


	/* See "Karatsuba Square Root", reference in gmp.texi. */


	#include <stdio.h>
	#include <stdlib.h>

	#include "gmp.h"
	#include "gmp-impl.h"
	#include "longlong.h"

	static const unsigned short invsqrttab[384] =
	{
	0x1ff,0x1fd,0x1fb,0x1f9,0x1f7,0x1f5,0x1f3,0x1f2, /* sqrt(1/80)..sqrt(1/87) */
	0x1f0,0x1ee,0x1ec,0x1ea,0x1e9,0x1e7,0x1e5,0x1e4, /* sqrt(1/88)..sqrt(1/8f) */
	0x1e2,0x1e0,0x1df,0x1dd,0x1db,0x1da,0x1d8,0x1d7, /* sqrt(1/90)..sqrt(1/97) */
	0x1d5,0x1d4,0x1d2,0x1d1,0x1cf,0x1ce,0x1cc,0x1cb, /* sqrt(1/98)..sqrt(1/9f) */
	0x1c9,0x1c8,0x1c6,0x1c5,0x1c4,0x1c2,0x1c1,0x1c0, /* sqrt(1/a0)..sqrt(1/a7) */
	0x1be,0x1bd,0x1bc,0x1ba,0x1b9,0x1b8,0x1b7,0x1b5, /* sqrt(1/a8)..sqrt(1/af) */
	0x1b4,0x1b3,0x1b2,0x1b0,0x1af,0x1ae,0x1ad,0x1ac, /* sqrt(1/b0)..sqrt(1/b7) */
	0x1aa,0x1a9,0x1a8,0x1a7,0x1a6,0x1a5,0x1a4,0x1a3, /* sqrt(1/b8)..sqrt(1/bf) */
	0x1a2,0x1a0,0x19f,0x19e,0x19d,0x19c,0x19b,0x19a, /* sqrt(1/c0)..sqrt(1/c7) */
	0x199,0x198,0x197,0x196,0x195,0x194,0x193,0x192, /* sqrt(1/c8)..sqrt(1/cf) */
	0x191,0x190,0x18f,0x18e,0x18d,0x18c,0x18c,0x18b, /* sqrt(1/d0)..sqrt(1/d7) */
	0x18a,0x189,0x188,0x187,0x186,0x185,0x184,0x183, /* sqrt(1/d8)..sqrt(1/df) */
	0x183,0x182,0x181,0x180,0x17f,0x17e,0x17e,0x17d, /* sqrt(1/e0)..sqrt(1/e7) */
	0x17c,0x17b,0x17a,0x179,0x179,0x178,0x177,0x176, /* sqrt(1/e8)..sqrt(1/ef) */
	0x176,0x175,0x174,0x173,0x172,0x172,0x171,0x170, /* sqrt(1/f0)..sqrt(1/f7) */
	0x16f,0x16f,0x16e,0x16d,0x16d,0x16c,0x16b,0x16a, /* sqrt(1/f8)..sqrt(1/ff) */
	0x16a,0x169,0x168,0x168,0x167,0x166,0x166,0x165, /* sqrt(1/100)..sqrt(1/107) */
	0x164,0x164,0x163,0x162,0x162,0x161,0x160,0x160, /* sqrt(1/108)..sqrt(1/10f) */
	0x15f,0x15e,0x15e,0x15d,0x15c,0x15c,0x15b,0x15a, /* sqrt(1/110)..sqrt(1/117) */
	0x15a,0x159,0x159,0x158,0x157,0x157,0x156,0x156, /* sqrt(1/118)..sqrt(1/11f) */
	0x155,0x154,0x154,0x153,0x153,0x152,0x152,0x151, /* sqrt(1/120)..sqrt(1/127) */
	0x150,0x150,0x14f,0x14f,0x14e,0x14e,0x14d,0x14d, /* sqrt(1/128)..sqrt(1/12f) */
	0x14c,0x14b,0x14b,0x14a,0x14a,0x149,0x149,0x148, /* sqrt(1/130)..sqrt(1/137) */
	0x148,0x147,0x147,0x146,0x146,0x145,0x145,0x144, /* sqrt(1/138)..sqrt(1/13f) */
	0x144,0x143,0x143,0x142,0x142,0x141,0x141,0x140, /* sqrt(1/140)..sqrt(1/147) */
	0x140,0x13f,0x13f,0x13e,0x13e,0x13d,0x13d,0x13c, /* sqrt(1/148)..sqrt(1/14f) */
	0x13c,0x13b,0x13b,0x13a,0x13a,0x139,0x139,0x139, /* sqrt(1/150)..sqrt(1/157) */
	0x138,0x138,0x137,0x137,0x136,0x136,0x135,0x135, /* sqrt(1/158)..sqrt(1/15f) */
	0x135,0x134,0x134,0x133,0x133,0x132,0x132,0x132, /* sqrt(1/160)..sqrt(1/167) */
	0x131,0x131,0x130,0x130,0x12f,0x12f,0x12f,0x12e, /* sqrt(1/168)..sqrt(1/16f) */
	0x12e,0x12d,0x12d,0x12d,0x12c,0x12c,0x12b,0x12b, /* sqrt(1/170)..sqrt(1/177) */
	0x12b,0x12a,0x12a,0x129,0x129,0x129,0x128,0x128, /* sqrt(1/178)..sqrt(1/17f) */
	0x127,0x127,0x127,0x126,0x126,0x126,0x125,0x125, /* sqrt(1/180)..sqrt(1/187) */
	0x124,0x124,0x124,0x123,0x123,0x123,0x122,0x122, /* sqrt(1/188)..sqrt(1/18f) */
	0x121,0x121,0x121,0x120,0x120,0x120,0x11f,0x11f, /* sqrt(1/190)..sqrt(1/197) */
	0x11f,0x11e,0x11e,0x11e,0x11d,0x11d,0x11d,0x11c, /* sqrt(1/198)..sqrt(1/19f) */
	0x11c,0x11b,0x11b,0x11b,0x11a,0x11a,0x11a,0x119, /* sqrt(1/1a0)..sqrt(1/1a7) */
	0x119,0x119,0x118,0x118,0x118,0x118,0x117,0x117, /* sqrt(1/1a8)..sqrt(1/1af) */
	0x117,0x116,0x116,0x116,0x115,0x115,0x115,0x114, /* sqrt(1/1b0)..sqrt(1/1b7) */
	0x114,0x114,0x113,0x113,0x113,0x112,0x112,0x112, /* sqrt(1/1b8)..sqrt(1/1bf) */
	0x112,0x111,0x111,0x111,0x110,0x110,0x110,0x10f, /* sqrt(1/1c0)..sqrt(1/1c7) */
	0x10f,0x10f,0x10f,0x10e,0x10e,0x10e,0x10d,0x10d, /* sqrt(1/1c8)..sqrt(1/1cf) */
	0x10d,0x10c,0x10c,0x10c,0x10c,0x10b,0x10b,0x10b, /* sqrt(1/1d0)..sqrt(1/1d7) */
	0x10a,0x10a,0x10a,0x10a,0x109,0x109,0x109,0x109, /* sqrt(1/1d8)..sqrt(1/1df) */
	0x108,0x108,0x108,0x107,0x107,0x107,0x107,0x106, /* sqrt(1/1e0)..sqrt(1/1e7) */
	0x106,0x106,0x106,0x105,0x105,0x105,0x104,0x104, /* sqrt(1/1e8)..sqrt(1/1ef) */
	0x104,0x104,0x103,0x103,0x103,0x103,0x102,0x102, /* sqrt(1/1f0)..sqrt(1/1f7) */
	0x102,0x102,0x101,0x101,0x101,0x101,0x100,0x100 /* sqrt(1/1f8)..sqrt(1/1ff) */
	};

	/* Compute s = floor(sqrt(a0)), and rp = a0 - s^2. /

	#if GMP_NUMB_BITS > 32
	#define MAGIC 0x10000000000 /* 0xffe7debbfc < MAGIC < 0x232b1850f410 */
	#else
	#define MAGIC 0x100000 /* 0xfee6f < MAGIC < 0x29cbc8 */
	#endif

	static mp_limb_t
	mpn_sqrtrem1 (mp_ptr rp, mp_limb_t a0)
	{
	#if GMP_NUMB_BITS > 32
	mp_limb_t a1;
	#endif
	mp_limb_t x0, t2, t, x2;
	unsigned abits;

	ASSERT_ALWAYS (GMP_NAIL_BITS == 0);
	ASSERT_ALWAYS (GMP_LIMB_BITS == 32 \|\| GMP_LIMB_BITS == 64);
	ASSERT (a0 >= GMP_NUMB_HIGHBIT / 2);

	/* Use Newton iterations for approximating 1/sqrt(a) instead of sqrt(a),
	since we can do the former without division. As part of the last
	iteration convert from 1/sqrt(a) to sqrt(a). */

	abits = a0 >> (GMP_LIMB_BITS - 1 - 8); /* extract bits for table lookup */
	x0 = invsqrttab[abits - 0x80]; /* initial 1/sqrt(a) */

	/* x0 is now an 8 bits approximation of 1/sqrt(a0) */

	#if GMP_NUMB_BITS > 32
	a1 = a0 >> (GMP_LIMB_BITS - 1 - 32);
	t = (mp_limb_signed_t) (0x2000000000000l - 0x30000 - a1 * x0 * x0) >> 16;
	x0 = (x0 << 16) + ((mp_limb_signed_t) (x0 * t) >> (16+2));

	/* x0 is now an 16 bits approximation of 1/sqrt(a0) */

	t2 = x0 * (a0 >> (32-8));
	t = t2 >> 25;
	t = ((mp_limb_signed_t) ((a0 << 14) - t * t - MAGIC) >> (32-8));
	x0 = t2 + ((mp_limb_signed_t) (x0 * t) >> 15);
	x0 >>= 32;
	#else
	t2 = x0 * (a0 >> (16-8));
	t = t2 >> 13;
	t = ((mp_limb_signed_t) ((a0 << 6) - t * t - MAGIC) >> (16-8));
	x0 = t2 + ((mp_limb_signed_t) (x0 * t) >> 7);
	x0 >>= 16;
	#endif

	/* x0 is now a full limb approximation of sqrt(a0) */

	x2 = x0 * x0;
	if (x2 + 2*x0 <= a0 - 1)
	{
	x2 += 2*x0 + 1;
	x0++;
	}

	*rp = a0 - x2;
	return x0;
	}


	#define Prec (GMP_NUMB_BITS >> 1)

	/* same as mpn_sqrtrem, but for size=2 and {np, 2} normalized
	return cc such that {np, 2} = sp[0]^2 + cc2^GMP_NUMB_BITS + rp[0] /
	static mp_limb_t
	mpn_sqrtrem2 (mp_ptr sp, mp_ptr rp, mp_srcptr np)
	{
	mp_limb_t qhl, q, u, np0, sp0, rp0, q2;
	int cc;

	ASSERT (np[1] >= GMP_NUMB_HIGHBIT / 2);

	np0 = np[0];
	sp0 = mpn_sqrtrem1 (rp, np[1]);
	qhl = 0;
	rp0 = rp[0];
	while (rp0 >= sp0)
	{
	qhl++;
	rp0 -= sp0;
	}
	/* now rp0 < sp0 < 2^Prec */
	rp0 = (rp0 << Prec) + (np0 >> Prec);
	u = 2 * sp0;
	q = rp0 / u;
	u = rp0 - q * u;
	q += (qhl & 1) << (Prec - 1);
	qhl >>= 1; /* if qhl=1, necessary q=0 as qhl2^Prec + q <= 2^Prec /
	/* now we have (initial rp0)<<Prec + np0>>Prec = (qhl<<Prec + q) * (2sp0) + u */
	sp0 = ((sp0 + qhl) << Prec) + q;
	cc = u >> Prec;
	rp0 = ((u << Prec) & GMP_NUMB_MASK) + (np0 & (((mp_limb_t) 1 << Prec) - 1));
	/* subtract q * q or qhl2^(2Prec) from rp */
	q2 = q * q;
	cc -= (rp0 < q2) + qhl;
	rp0 -= q2;
	/* now subtract 2q2^Prec + 2^(2Prec) if qhl is set /
	if (cc < 0)
	{
	if (sp0 != 0)
	{
	rp0 += sp0;
	cc += rp0 < sp0;
	}
	else
	cc++;
	--sp0;
	rp0 += sp0;
	cc += rp0 < sp0;
	}

	rp[0] = rp0;
	sp[0] = sp0;
	return cc;
	}

	/* writes in {sp, n} the square root (rounded towards zero) of {np, 2n},
	and in {np, n} the low n limbs of the remainder, returns the high
	limb of the remainder (which is 0 or 1).
	Assumes {np, 2n} is normalized, i.e. np[2n-1] >= B/4
	where B=2^GMP_NUMB_BITS. */
	static mp_limb_t
	mpn_dc_sqrtrem (mp_ptr sp, mp_ptr np, mp_size_t n)
	{
	mp_limb_t q; /* carry out of {sp, n} */
	int c, b; /* carry out of remainder */
	mp_size_t l, h;

	ASSERT (np[2 * n - 1] >= GMP_NUMB_HIGHBIT / 2);

	if (n == 1)
	c = mpn_sqrtrem2 (sp, np, np);
	else
	{
	l = n / 2;
	h = n - l;
	q = mpn_dc_sqrtrem (sp + l, np + 2 * l, h);
	if (q != 0)
	mpn_sub_n (np + 2 * l, np + 2 * l, sp + l, h);
	q += mpn_divrem (sp, 0, np + l, n, sp + l, h);
	c = sp[0] & 1;
	mpn_rshift (sp, sp, l, 1);
	sp[l - 1] \|= (q << (GMP_NUMB_BITS - 1)) & GMP_NUMB_MASK;
	q >>= 1;
	if (c != 0)
	c = mpn_add_n (np + l, np + l, sp + l, h);
	mpn_sqr_n (np + n, sp, l);
	b = q + mpn_sub_n (np, np, np + n, 2 * l);
	c -= (l == h) ? b : mpn_sub_1 (np + 2 * l, np + 2 * l, 1, (mp_limb_t) b);
	q = mpn_add_1 (sp + l, sp + l, h, q);

	if (c < 0)
	{
	c += mpn_addmul_1 (np, sp, n, CNST_LIMB(2)) + 2 * q;
	c -= mpn_sub_1 (np, np, n, CNST_LIMB(1));
	q -= mpn_sub_1 (sp, sp, n, CNST_LIMB(1));
	}
	}

	return c;
	}


	mp_size_t
	mpn_sqrtrem (mp_ptr sp, mp_ptr rp, mp_srcptr np, mp_size_t nn)
	{
	mp_limb_t *tp, s0[1], cc, high, rl;
	int c;
	mp_size_t rn, tn;
	TMP_DECL;

	ASSERT (nn >= 0);
	ASSERT_MPN (np, nn);

	/* If OP is zero, both results are zero. */
	if (nn == 0)
	return 0;

	ASSERT (np[nn - 1] != 0);
	ASSERT (rp == NULL \|\| MPN_SAME_OR_SEPARATE_P (np, rp, nn));
	ASSERT (rp == NULL \|\| ! MPN_OVERLAP_P (sp, (nn + 1) / 2, rp, nn));
	ASSERT (! MPN_OVERLAP_P (sp, (nn + 1) / 2, np, nn));

	high = np[nn - 1];
	if (nn == 1 && (high & GMP_NUMB_HIGHBIT))
	{
	mp_limb_t r;
	sp[0] = mpn_sqrtrem1 (&r, high);
	if (rp != NULL)
	rp[0] = r;
	return r != 0;
	}
	count_leading_zeros (c, high);
	c -= GMP_NAIL_BITS;

	c = c / 2; /* we have to shift left by 2c bits to normalize {np, nn} */
	tn = (nn + 1) / 2; /* 2tn is the smallest even integer >= nn /

	TMP_MARK;
	if (nn % 2 != 0 \|\| c > 0)
	{
	tp = TMP_ALLOC_LIMBS (2 * tn);
	tp[0] = 0; /* needed only when 2tn > nn, but saves a test /
	if (c != 0)
	mpn_lshift (tp + 2 * tn - nn, np, nn, 2 * c);
	else
	MPN_COPY (tp + 2 * tn - nn, np, nn);
	rl = mpn_dc_sqrtrem (sp, tp, tn);
	/* We have 2^(2k)N = S^2 + R where k = c + (2tn-nn)GMP_NUMB_BITS/2,
	thus 2^(2k)N = (S-s0)^2 + 2Ss0 - s0^2 + R where s0=S mod 2^k /
	c += (nn % 2) * GMP_NUMB_BITS / 2; /* c now represents k */
	s0[0] = sp[0] & (((mp_limb_t) 1 << c) - 1); /* S mod 2^k */
	rl += mpn_addmul_1 (tp, sp, tn, 2 * s0[0]); /* R = R + 2s0S */
	cc = mpn_submul_1 (tp, s0, 1, s0[0]);
	rl -= (tn > 1) ? mpn_sub_1 (tp + 1, tp + 1, tn - 1, cc) : cc;
	mpn_rshift (sp, sp, tn, c);
	tp[tn] = rl;
	if (rp == NULL)
	rp = tp;
	c = c << 1;
	if (c < GMP_NUMB_BITS)
	tn++;
	else
	{
	tp++;
	c -= GMP_NUMB_BITS;
	}
	if (c != 0)
	mpn_rshift (rp, tp, tn, c);
	else
	MPN_COPY_INCR (rp, tp, tn);
	rn = tn;
	}
	else
	{
	if (rp == NULL)
	rp = TMP_ALLOC_LIMBS (nn);
	if (rp != np)
	MPN_COPY (rp, np, nn);
	rn = tn + (rp[tn] = mpn_dc_sqrtrem (sp, rp, tn));
	}

	MPN_NORMALIZE (rp, rn);

	TMP_FREE;
	return rn;
	}