| /* mpn_mu_bdiv_q(qp,np,nn,dp,dn,tp) -- Compute {np,nn} / {dp,dn} mod B^nn. |
| storing the result in {qp,nn}. Overlap allowed between Q and N; all other |
| overlap disallowed. |
| |
| Contributed to the GNU project by Torbjorn Granlund. |
| |
| THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH A MUTABLE INTERFACE. IT IS |
| ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS |
| ALMOST GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP |
| RELEASE. |
| |
| Copyright 2005, 2006, 2007 Free Software Foundation, Inc. |
| |
| This file is part of the GNU MP Library. |
| |
| The GNU MP Library is free software; you can redistribute it and/or modify |
| it under the terms of the GNU Lesser General Public License as published by |
| the Free Software Foundation; either version 3 of the License, or (at your |
| option) any later version. |
| |
| The GNU MP Library is distributed in the hope that it will be useful, but |
| WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
| or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public |
| License for more details. |
| |
| You should have received a copy of the GNU Lesser General Public License |
| along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ |
| |
| |
| /* We use the "misunderstanding algorithm" (MU), discovered by Paul Zimmermann |
| and Torbjorn Granlund when Torbjorn misunderstood Paul's explanation of |
| Jebelean's bidirectional exact division algorithm. |
| |
| The idea of this algorithm is to compute a smaller inverted value than used |
| in the standard Barrett algorithm, and thus save time in the Newton |
| iterations, and pay just a small price when using the inverted value for |
| developing quotient bits. |
| |
| Written by Torbjorn Granlund. Paul Zimmermann suggested the use of the |
| "wrap around" trick. |
| */ |
| |
| #include "gmp.h" |
| #include "gmp-impl.h" |
| |
| |
| /* N = {np,nn} |
| D = {dp,dn} |
| |
| Requirements: N >= D |
| D >= 1 |
| N mod D = 0 |
| D odd |
| dn >= 2 |
| nn >= 2 |
| scratch space as determined by mpn_divexact_itch(nn,dn). |
| |
| Write quotient to Q = {qp,nn}. |
| |
| FIXME: When iterating, perhaps do the small step before loop, not after. |
| FIXME: Try to avoid the scalar divisions when computing inverse size. |
| FIXME: Trim allocation for (qn > dn) case, 3*dn might be possible. In |
| particular, when dn==in, tp and rp could use the same space. |
| FIXME: Trim final quotient calculation to qn limbs of precision. |
| */ |
| void |
| mpn_mu_bdiv_q (mp_ptr qp, |
| mp_srcptr np, mp_size_t nn, |
| mp_srcptr dp, mp_size_t dn, |
| mp_ptr scratch) |
| { |
| mp_ptr ip; |
| mp_ptr rp; |
| mp_size_t qn; |
| mp_size_t in; |
| |
| qn = nn; |
| |
| ASSERT (dn >= 2); |
| ASSERT (qn >= 2); |
| |
| if (qn > dn) |
| { |
| mp_size_t b; |
| mp_ptr tp; |
| mp_limb_t cy; |
| int k; |
| mp_size_t m, wn; |
| mp_size_t i; |
| |
| /* |_______________________| dividend |
| |________| divisor */ |
| |
| /* Compute an inverse size that is a nice partition of the quotient. */ |
| b = (qn - 1) / dn + 1; /* ceil(qn/dn), number of blocks */ |
| in = (qn - 1) / b + 1; /* ceil(qn/b) = ceil(qn / ceil(qn/dn)) */ |
| |
| /* Some notes on allocation: |
| |
| When in = dn, R dies when mpn_mullow returns, if in < dn the low in |
| limbs of R dies at that point. We could save memory by letting T live |
| just under R, and let the upper part of T expand into R. These changes |
| should reduce itch to perhaps 3dn. |
| */ |
| |
| ip = scratch; /* in limbs */ |
| rp = scratch + in; /* dn limbs */ |
| tp = scratch + in + dn; /* dn + in limbs FIXME: mpn_fft_next_size */ |
| scratch += in; /* Roughly 2in+1 limbs */ |
| |
| mpn_binvert (ip, dp, in, scratch); |
| |
| cy = 0; |
| |
| MPN_COPY (rp, np, dn); |
| np += dn; |
| mpn_mullow_n (qp, rp, ip, in); |
| qn -= in; |
| |
| if (ABOVE_THRESHOLD (dn, MUL_FFT_MODF_THRESHOLD)) |
| { |
| k = mpn_fft_best_k (dn, 0); |
| m = mpn_fft_next_size (dn, k); |
| wn = dn + in - m; /* number of wrapped limbs */ |
| ASSERT_ALWAYS (wn >= 0); /* could handle this below */ |
| } |
| |
| while (qn > in) |
| { |
| #if WANT_FFT |
| if (ABOVE_THRESHOLD (dn, MUL_FFT_MODF_THRESHOLD)) |
| { |
| /* The two multiplicands are dn and 'in' limbs, with dn >= in. |
| The relevant part of the result will typically partially wrap, |
| and that part will come out as subtracted to the right. The |
| unwrapped part, m-in limbs at the high end of tp, is the lower |
| part of the sought product. The wrapped part, at the low end |
| of tp, will be subtracted from the low part of the partial |
| remainder; we undo that operation with another subtraction. */ |
| int c0; |
| |
| mpn_mul_fft (tp, m, dp, dn, qp, in, k); |
| |
| c0 = mpn_sub_n (tp + m, rp, tp, wn); |
| |
| for (i = wn; c0 != 0 && i < in; i++) |
| c0 = tp[i] == GMP_NUMB_MASK; |
| mpn_incr_u (tp + in, c0); |
| } |
| else |
| #endif |
| mpn_mul (tp, dp, dn, qp, in); /* mulhi, need tp[dn+in-1...in] */ |
| qp += in; |
| if (dn != in) |
| { |
| /* Subtract tp[dn-1...in] from partial remainder. */ |
| cy += mpn_sub_n (rp, rp + in, tp + in, dn - in); |
| if (cy == 2) |
| { |
| mpn_incr_u (tp + dn, 1); |
| cy = 1; |
| } |
| } |
| /* Subtract tp[dn+in-1...dn] from dividend. */ |
| cy = mpn_sub_nc (rp + dn - in, np, tp + dn, in, cy); |
| np += in; |
| mpn_mullow_n (qp, rp, ip, in); |
| qn -= in; |
| } |
| |
| /* Generate last qn limbs. |
| FIXME: It should be possible to limit precision here, since qn is |
| typically somewhat smaller than dn. No big gains expected. */ |
| #if WANT_FFT |
| if (ABOVE_THRESHOLD (dn, MUL_FFT_MODF_THRESHOLD)) |
| { |
| int c0; |
| |
| mpn_mul_fft (tp, m, dp, dn, qp, in, k); |
| |
| c0 = mpn_sub_n (tp + m, rp, tp, wn); |
| |
| for (i = wn; c0 != 0 && i < in; i++) |
| c0 = tp[i] == GMP_NUMB_MASK; |
| mpn_incr_u (tp + in, c0); |
| } |
| else |
| #endif |
| mpn_mul (tp, dp, dn, qp, in); /* mulhi, need tp[qn+in-1...in] */ |
| qp += in; |
| if (dn != in) |
| { |
| cy += mpn_sub_n (rp, rp + in, tp + in, dn - in); |
| if (cy == 2) |
| { |
| mpn_incr_u (tp + dn, 1); |
| cy = 1; |
| } |
| } |
| |
| mpn_sub_nc (rp + dn - in, np, tp + dn, qn - (dn - in), cy); |
| mpn_mullow_n (qp, rp, ip, qn); |
| } |
| else |
| { |
| /* |_______________________| dividend |
| |________________| divisor */ |
| |
| /* Compute half-sized inverse. */ |
| in = qn - (qn >> 1); |
| |
| ip = scratch; /* ceil(qn/2) limbs */ |
| rp = scratch + in; /* ceil(qn/2)+qn limbs */ |
| scratch += in; /* 2*ceil(qn/2)+2 */ |
| |
| mpn_binvert (ip, dp, in, scratch); |
| |
| mpn_mullow_n (qp, np, ip, in); /* low `in' quotient limbs */ |
| #if WANT_FFT |
| if (ABOVE_THRESHOLD (qn, MUL_FFT_MODF_THRESHOLD)) |
| { |
| int k; |
| mp_size_t m; |
| |
| k = mpn_fft_best_k (qn, 0); |
| m = mpn_fft_next_size (qn, k); |
| mpn_mul_fft (rp, m, dp, qn, qp, in, k); |
| if (mpn_cmp (np, rp, in) < 0) |
| mpn_incr_u (rp + in, 1); |
| } |
| else |
| #endif |
| mpn_mul (rp, dp, qn, qp, in); /* mulhigh */ |
| |
| mpn_sub_n (rp, np + in, rp + in, qn - in); |
| mpn_mullow_n (qp + in, rp, ip, qn - in); /* high qn-in quotient limbs */ |
| } |
| } |
| |
| mp_size_t |
| mpn_mu_bdiv_q_itch (mp_size_t nn, mp_size_t dn) |
| { |
| mp_size_t qn; |
| |
| qn = nn; |
| |
| if (qn > dn) |
| { |
| return 4 * dn; /* FIXME FIXME FIXME need mpn_fft_next_size */ |
| } |
| else |
| { |
| return 2 * qn + 1 + 2; /* FIXME FIXME FIXME need mpn_fft_next_size */ |
| } |
| } |