| /* mpn_rootrem(rootp,remp,ap,an,nth) -- Compute the nth root of {ap,an}, and |
| store the truncated integer part at rootp and the remainder at remp. |
| |
| Contributed by Paul Zimmermann (algorithm) and |
| Paul Zimmermann and Torbjorn Granlund (implementation). |
| |
| THE FUNCTIONS IN THIS FILE ARE INTERNAL, AND HAVE MUTABLE INTERFACES. IT'S |
| ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT'S ALMOST |
| GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE. |
| |
| Copyright 2002, 2005, 2009 Free Software Foundation, Inc. |
| |
| This file is part of the GNU MP Library. |
| |
| The GNU MP Library is free software; you can redistribute it and/or modify |
| it under the terms of the GNU Lesser General Public License as published by |
| the Free Software Foundation; either version 3 of the License, or (at your |
| option) any later version. |
| |
| The GNU MP Library is distributed in the hope that it will be useful, but |
| WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
| or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public |
| License for more details. |
| |
| You should have received a copy of the GNU Lesser General Public License |
| along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ |
| |
| /* FIXME: |
| (a) Once there is a native mpn_tdiv_q function in GMP (division without |
| remainder), replace the quick-and-dirty implementation below by it. |
| (b) The implementation below is not optimal when remp == NULL, since the |
| complexity is M(n) where n is the input size, whereas it should be |
| only M(n/k) on average. |
| */ |
| |
| #include <stdio.h> /* for NULL */ |
| |
| #include "gmp.h" |
| #include "gmp-impl.h" |
| #include "longlong.h" |
| |
| static mp_size_t mpn_rootrem_internal (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, |
| mp_limb_t, int); |
| static void mpn_tdiv_q (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, |
| mp_srcptr, mp_size_t); |
| |
| #define MPN_RSHIFT(cy,rp,up,un,cnt) \ |
| do { \ |
| if ((cnt) != 0) \ |
| cy = mpn_rshift (rp, up, un, cnt); \ |
| else \ |
| { \ |
| MPN_COPY_INCR (rp, up, un); \ |
| cy = 0; \ |
| } \ |
| } while (0) |
| |
| #define MPN_LSHIFT(cy,rp,up,un,cnt) \ |
| do { \ |
| if ((cnt) != 0) \ |
| cy = mpn_lshift (rp, up, un, cnt); \ |
| else \ |
| { \ |
| MPN_COPY_DECR (rp, up, un); \ |
| cy = 0; \ |
| } \ |
| } while (0) |
| |
| |
| /* Put in {rootp, ceil(un/k)} the kth root of {up, un}, rounded toward zero. |
| If remp <> NULL, put in {remp, un} the remainder. |
| Return the size (in limbs) of the remainder if remp <> NULL, |
| or a non-zero value iff the remainder is non-zero when remp = NULL. |
| Assumes: |
| (a) up[un-1] is not zero |
| (b) rootp has at least space for ceil(un/k) limbs |
| (c) remp has at least space for un limbs (in case remp <> NULL) |
| (d) the operands do not overlap. |
| |
| The auxiliary memory usage is 3*un+2 if remp = NULL, |
| and 2*un+2 if remp <> NULL. FIXME: This is an incorrect comment. |
| */ |
| mp_size_t |
| mpn_rootrem (mp_ptr rootp, mp_ptr remp, |
| mp_srcptr up, mp_size_t un, mp_limb_t k) |
| { |
| ASSERT (un > 0); |
| ASSERT (up[un - 1] != 0); |
| ASSERT (k > 1); |
| |
| if ((remp == NULL) && (un / k > 2)) |
| /* call mpn_rootrem recursively, padding {up,un} with k zero limbs, |
| which will produce an approximate root with one more limb, |
| so that in most cases we can conclude. */ |
| { |
| mp_ptr sp, wp; |
| mp_size_t rn, sn, wn; |
| TMP_DECL; |
| TMP_MARK; |
| wn = un + k; |
| wp = TMP_ALLOC_LIMBS (wn); /* will contain the padded input */ |
| sn = (un - 1) / k + 2; /* ceil(un/k) + 1 */ |
| sp = TMP_ALLOC_LIMBS (sn); /* approximate root of padded input */ |
| MPN_COPY (wp + k, up, un); |
| MPN_ZERO (wp, k); |
| rn = mpn_rootrem_internal (sp, NULL, wp, wn, k, 1); |
| /* the approximate root S = {sp,sn} is either the correct root of |
| {sp,sn}, or one too large. Thus unless the least significant limb |
| of S is 0 or 1, we can deduce the root of {up,un} is S truncated by |
| one limb. (In case sp[0]=1, we can deduce the root, but not decide |
| whether it is exact or not.) */ |
| MPN_COPY (rootp, sp + 1, sn - 1); |
| TMP_FREE; |
| return rn; |
| } |
| else /* remp <> NULL */ |
| { |
| return mpn_rootrem_internal (rootp, remp, up, un, k, 0); |
| } |
| } |
| |
| /* if approx is non-zero, does not compute the final remainder */ |
| static mp_size_t |
| mpn_rootrem_internal (mp_ptr rootp, mp_ptr remp, mp_srcptr up, mp_size_t un, |
| mp_limb_t k, int approx) |
| { |
| mp_ptr qp, rp, sp, wp; |
| mp_size_t qn, rn, sn, wn, nl, bn; |
| mp_limb_t save, save2, cy; |
| unsigned long int unb; /* number of significant bits of {up,un} */ |
| unsigned long int xnb; /* number of significant bits of the result */ |
| unsigned int cnt; |
| unsigned long b, kk; |
| unsigned long sizes[GMP_NUMB_BITS + 1]; |
| int ni, i; |
| int c; |
| int logk; |
| TMP_DECL; |
| |
| TMP_MARK; |
| |
| /* qp and wp need enough space to store S'^k where S' is an approximate |
| root. Since S' can be as large as S+2, the worst case is when S=2 and |
| S'=4. But then since we know the number of bits of S in advance, S' |
| can only be 3 at most. Similarly for S=4, then S' can be 6 at most. |
| So the worst case is S'/S=3/2, thus S'^k <= (3/2)^k * S^k. Since S^k |
| fits in un limbs, the number of extra limbs needed is bounded by |
| ceil(k*log2(3/2)/GMP_NUMB_BITS). */ |
| #define EXTRA 2 + (mp_size_t) (0.585 * (double) k / (double) GMP_NUMB_BITS) |
| qp = TMP_ALLOC_LIMBS (un + EXTRA); /* will contain quotient and remainder |
| of R/(k*S^(k-1)), and S^k */ |
| if (remp == NULL) |
| rp = TMP_ALLOC_LIMBS (un); /* will contain the remainder */ |
| else |
| rp = remp; |
| sp = rootp; |
| wp = TMP_ALLOC_LIMBS (un + EXTRA); /* will contain S^(k-1), k*S^(k-1), |
| and temporary for mpn_pow_1 */ |
| count_leading_zeros (cnt, up[un - 1]); |
| unb = un * GMP_NUMB_BITS - cnt + GMP_NAIL_BITS; |
| /* unb is the number of bits of the input U */ |
| |
| xnb = (unb - 1) / k + 1; /* ceil (unb / k) */ |
| /* xnb is the number of bits of the root R */ |
| |
| if (xnb == 1) /* root is 1 */ |
| { |
| if (remp == NULL) |
| remp = rp; |
| mpn_sub_1 (remp, up, un, (mp_limb_t) 1); |
| MPN_NORMALIZE (remp, un); /* There should be at most one zero limb, |
| if we demand u to be normalized */ |
| rootp[0] = 1; |
| TMP_FREE; |
| return un; |
| } |
| |
| /* We initialize the algorithm with a 1-bit approximation to zero: since we |
| know the root has exactly xnb bits, we write r0 = 2^(xnb-1), so that |
| r0^k = 2^(k*(xnb-1)), that we subtract to the input. */ |
| kk = k * (xnb - 1); /* number of truncated bits in the input */ |
| rn = un - kk / GMP_NUMB_BITS; /* number of limbs of the non-truncated part */ |
| MPN_RSHIFT (cy, rp, up + kk / GMP_NUMB_BITS, rn, kk % GMP_NUMB_BITS); |
| mpn_sub_1 (rp, rp, rn, 1); /* subtract the initial approximation: since |
| the non-truncated part is less than 2^k, it |
| is <= k bits: rn <= ceil(k/GMP_NUMB_BITS) */ |
| sp[0] = 1; /* initial approximation */ |
| sn = 1; /* it has one limb */ |
| |
| for (logk = 1; ((k - 1) >> logk) != 0; logk++) |
| ; |
| /* logk = ceil(log(k)/log(2)) */ |
| |
| b = xnb - 1; /* number of remaining bits to determine in the kth root */ |
| ni = 0; |
| while (b != 0) |
| { |
| /* invariant: here we want b+1 total bits for the kth root */ |
| sizes[ni] = b; |
| /* if c is the new value of b, this means that we'll go from a root |
| of c+1 bits (say s') to a root of b+1 bits. |
| It is proved in the book "Modern Computer Arithmetic" from Brent |
| and Zimmermann, Chapter 1, that |
| if s' >= k*beta, then at most one correction is necessary. |
| Here beta = 2^(b-c), and s' >= 2^c, thus it suffices that |
| c >= ceil((b + log2(k))/2). */ |
| b = (b + logk + 1) / 2; |
| if (b >= sizes[ni]) |
| b = sizes[ni] - 1; /* add just one bit at a time */ |
| ni++; |
| } |
| sizes[ni] = 0; |
| ASSERT_ALWAYS (ni < GMP_NUMB_BITS + 1); |
| /* We have sizes[0] = b > sizes[1] > ... > sizes[ni] = 0 with |
| sizes[i] <= 2 * sizes[i+1]. |
| Newton iteration will first compute sizes[ni-1] extra bits, |
| then sizes[ni-2], ..., then sizes[0] = b. */ |
| |
| wp[0] = 1; /* {sp,sn}^(k-1) = 1 */ |
| wn = 1; |
| for (i = ni; i != 0; i--) |
| { |
| /* 1: loop invariant: |
| {sp, sn} is the current approximation of the root, which has |
| exactly 1 + sizes[ni] bits. |
| {rp, rn} is the current remainder |
| {wp, wn} = {sp, sn}^(k-1) |
| kk = number of truncated bits of the input |
| */ |
| b = sizes[i - 1] - sizes[i]; /* number of bits to compute in that |
| iteration */ |
| |
| /* Reinsert a low zero limb if we normalized away the entire remainder */ |
| if (rn == 0) |
| { |
| rp[0] = 0; |
| rn = 1; |
| } |
| |
| /* first multiply the remainder by 2^b */ |
| MPN_LSHIFT (cy, rp + b / GMP_NUMB_BITS, rp, rn, b % GMP_NUMB_BITS); |
| rn = rn + b / GMP_NUMB_BITS; |
| if (cy != 0) |
| { |
| rp[rn] = cy; |
| rn++; |
| } |
| |
| kk = kk - b; |
| |
| /* 2: current buffers: {sp,sn}, {rp,rn}, {wp,wn} */ |
| |
| /* Now insert bits [kk,kk+b-1] from the input U */ |
| bn = b / GMP_NUMB_BITS; /* lowest limb from high part of rp[] */ |
| save = rp[bn]; |
| /* nl is the number of limbs in U which contain bits [kk,kk+b-1] */ |
| nl = 1 + (kk + b - 1) / GMP_NUMB_BITS - (kk / GMP_NUMB_BITS); |
| /* nl = 1 + floor((kk + b - 1) / GMP_NUMB_BITS) |
| - floor(kk / GMP_NUMB_BITS) |
| <= 1 + (kk + b - 1) / GMP_NUMB_BITS |
| - (kk - GMP_NUMB_BITS + 1) / GMP_NUMB_BITS |
| = 2 + (b - 2) / GMP_NUMB_BITS |
| thus since nl is an integer: |
| nl <= 2 + floor(b/GMP_NUMB_BITS) <= 2 + bn. */ |
| /* we have to save rp[bn] up to rp[nl-1], i.e. 1 or 2 limbs */ |
| if (nl - 1 > bn) |
| save2 = rp[bn + 1]; |
| MPN_RSHIFT (cy, rp, up + kk / GMP_NUMB_BITS, nl, kk % GMP_NUMB_BITS); |
| /* set to zero high bits of rp[bn] */ |
| rp[bn] &= ((mp_limb_t) 1 << (b % GMP_NUMB_BITS)) - 1; |
| /* restore corresponding bits */ |
| rp[bn] |= save; |
| if (nl - 1 > bn) |
| rp[bn + 1] = save2; /* the low b bits go in rp[0..bn] only, since |
| they start by bit 0 in rp[0], so they use |
| at most ceil(b/GMP_NUMB_BITS) limbs */ |
| |
| /* 3: current buffers: {sp,sn}, {rp,rn}, {wp,wn} */ |
| |
| /* compute {wp, wn} = k * {sp, sn}^(k-1) */ |
| cy = mpn_mul_1 (wp, wp, wn, k); |
| wp[wn] = cy; |
| wn += cy != 0; |
| |
| /* 4: current buffers: {sp,sn}, {rp,rn}, {wp,wn} */ |
| |
| /* now divide {rp, rn} by {wp, wn} to get the low part of the root */ |
| if (rn < wn) |
| { |
| qn = 0; |
| } |
| else |
| { |
| mp_ptr tp; |
| qn = rn - wn; /* expected quotient size */ |
| /* tp must have space for wn limbs. |
| The quotient needs rn-wn+1 limbs, thus quotient+remainder |
| need altogether rn+1 limbs. */ |
| tp = qp + qn + 1; /* put remainder in Q buffer */ |
| mpn_tdiv_q (qp, tp, 0, rp, rn, wp, wn); |
| qn += qp[qn] != 0; |
| } |
| |
| /* 5: current buffers: {sp,sn}, {qp,qn}. |
| Note: {rp,rn} is not needed any more since we'll compute it from |
| scratch at the end of the loop. |
| */ |
| |
| /* Number of limbs used by b bits, when least significant bit is |
| aligned to least limb */ |
| bn = (b - 1) / GMP_NUMB_BITS + 1; |
| |
| /* the quotient should be smaller than 2^b, since the previous |
| approximation was correctly rounded toward zero */ |
| if (qn > bn || (qn == bn && (b % GMP_NUMB_BITS != 0) && |
| qp[qn - 1] >= ((mp_limb_t) 1 << (b % GMP_NUMB_BITS)))) |
| { |
| qn = b / GMP_NUMB_BITS + 1; /* b+1 bits */ |
| MPN_ZERO (qp, qn); |
| qp[qn - 1] = (mp_limb_t) 1 << (b % GMP_NUMB_BITS); |
| MPN_DECR_U (qp, qn, 1); |
| qn -= qp[qn - 1] == 0; |
| } |
| |
| /* 6: current buffers: {sp,sn}, {qp,qn} */ |
| |
| /* multiply the root approximation by 2^b */ |
| MPN_LSHIFT (cy, sp + b / GMP_NUMB_BITS, sp, sn, b % GMP_NUMB_BITS); |
| sn = sn + b / GMP_NUMB_BITS; |
| if (cy != 0) |
| { |
| sp[sn] = cy; |
| sn++; |
| } |
| |
| /* 7: current buffers: {sp,sn}, {qp,qn} */ |
| |
| ASSERT_ALWAYS (bn >= qn); /* this is ok since in the case qn > bn |
| above, q is set to 2^b-1, which has |
| exactly bn limbs */ |
| |
| /* Combine sB and q to form sB + q. */ |
| save = sp[b / GMP_NUMB_BITS]; |
| MPN_COPY (sp, qp, qn); |
| MPN_ZERO (sp + qn, bn - qn); |
| sp[b / GMP_NUMB_BITS] |= save; |
| |
| /* 8: current buffer: {sp,sn} */ |
| |
| /* Since each iteration treats b bits from the root and thus k*b bits |
| from the input, and we already considered b bits from the input, |
| we now have to take another (k-1)*b bits from the input. */ |
| kk -= (k - 1) * b; /* remaining input bits */ |
| /* {rp, rn} = floor({up, un} / 2^kk) */ |
| MPN_RSHIFT (cy, rp, up + kk / GMP_NUMB_BITS, un - kk / GMP_NUMB_BITS, kk % GMP_NUMB_BITS); |
| rn = un - kk / GMP_NUMB_BITS; |
| rn -= rp[rn - 1] == 0; |
| |
| /* 9: current buffers: {sp,sn}, {rp,rn} */ |
| |
| for (c = 0;; c++) |
| { |
| /* Compute S^k in {qp,qn}. */ |
| if (i == 1) |
| { |
| /* Last iteration: we don't need W anymore. */ |
| /* mpn_pow_1 requires that both qp and wp have enough space to |
| store the result {sp,sn}^k + 1 limb */ |
| approx = approx && (sp[0] > 1); |
| qn = (approx == 0) ? mpn_pow_1 (qp, sp, sn, k, wp) : 0; |
| } |
| else |
| { |
| /* W <- S^(k-1) for the next iteration, |
| and S^k = W * S. */ |
| wn = mpn_pow_1 (wp, sp, sn, k - 1, qp); |
| mpn_mul (qp, wp, wn, sp, sn); |
| qn = wn + sn; |
| qn -= qp[qn - 1] == 0; |
| } |
| |
| /* if S^k > floor(U/2^kk), the root approximation was too large */ |
| if (qn > rn || (qn == rn && mpn_cmp (qp, rp, rn) > 0)) |
| MPN_DECR_U (sp, sn, 1); |
| else |
| break; |
| } |
| |
| /* 10: current buffers: {sp,sn}, {rp,rn}, {qp,qn}, {wp,wn} */ |
| |
| ASSERT_ALWAYS (c <= 1); |
| ASSERT_ALWAYS (rn >= qn); |
| |
| /* R = R - Q = floor(U/2^kk) - S^k */ |
| if ((i > 1) || (approx == 0)) |
| { |
| mpn_sub (rp, rp, rn, qp, qn); |
| MPN_NORMALIZE (rp, rn); |
| } |
| /* otherwise we have rn > 0, thus the return value is ok */ |
| |
| /* 11: current buffers: {sp,sn}, {rp,rn}, {wp,wn} */ |
| } |
| |
| TMP_FREE; |
| return rn; |
| } |
| |
| /* return the quotient Q = {np, nn} divided by {dp, dn} only */ |
| static void |
| mpn_tdiv_q (mp_ptr qp, mp_ptr rp, mp_size_t qxn, mp_srcptr np, mp_size_t nn, |
| mp_srcptr dp, mp_size_t dn) |
| { |
| mp_size_t qn = nn - dn; /* expected quotient size is qn+1 */ |
| mp_size_t cut; |
| |
| ASSERT_ALWAYS (qxn == 0); |
| if (dn <= qn + 3) |
| { |
| mpn_tdiv_qr (qp, rp, 0, np, nn, dp, dn); |
| } |
| else |
| { |
| mp_ptr tp; |
| TMP_DECL; |
| TMP_MARK; |
| tp = TMP_ALLOC_LIMBS (qn + 2); |
| cut = dn - (qn + 3); |
| /* perform a first division with divisor cut to dn-cut=qn+3 limbs |
| and dividend to nn-(cut-1) limbs, i.e. the quotient will be one |
| limb more than the final quotient. |
| The quotient will have qn+2 < dn-cut limbs, |
| and the remainder dn-cut = qn+3 limbs. */ |
| mpn_tdiv_qr (tp, rp, 0, np + cut - 1, nn - cut + 1, dp + cut, dn - cut); |
| /* let Q' be the quotient of B * {np, nn} by {dp, dn} [qn+2 limbs] |
| and T be the approximation of Q' computed above, where |
| B = 2^GMP_NUMB_BITS. |
| We have Q' <= T <= Q'+1, and since floor(Q'/B) = Q, we have |
| Q = floor(T/B), unless the last limb of T only consists of zeroes. */ |
| if (tp[0] != 0) |
| { |
| /* simply truncate one limb of T */ |
| MPN_COPY (qp, tp + 1, qn + 1); |
| } |
| else /* too bad: perform the expensive division */ |
| { |
| mpn_tdiv_qr (qp, rp, 0, np, nn, dp, dn); |
| } |
| TMP_FREE; |
| } |
| } |