blob: 24d4dcdb6f89142f2af1f5d1eed51224c3da0ac0 [file] [log] [blame]
/* Compute {up,n}^(-1) mod 2(n*GMP_NUMB_BITS).
Contributed to the GNU project by Torbjorn Granlund.
THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH A MUTABLE INTERFACE. IT IS
ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS
ALMOST GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP
RELEASE.
Copyright (C) 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 3 of the License, or (at your
option) any later version.
The GNU MP Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
License for more details.
You should have received a copy of the GNU Lesser General Public License
along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
#include "gmp.h"
#include "gmp-impl.h"
/*
r[k+1] = r[k] - r[k] * (u*r[k] - 1)
r[k+1] = r[k] + r[k] - r[k]*(u*r[k])
*/
/* This is intended for constant THRESHOLDs only, where the compiler can
completely fold the result. */
#define LOG2C(n) \
(((n) >= 0x1) + ((n) >= 0x2) + ((n) >= 0x4) + ((n) >= 0x8) + \
((n) >= 0x10) + ((n) >= 0x20) + ((n) >= 0x40) + ((n) >= 0x80) + \
((n) >= 0x100) + ((n) >= 0x200) + ((n) >= 0x400) + ((n) >= 0x800) + \
((n) >= 0x1000) + ((n) >= 0x2000) + ((n) >= 0x4000) + ((n) >= 0x8000))
#if TUNE_PROGRAM_BUILD
#define NPOWS \
((sizeof(mp_size_t) > 6 ? 48 : 8*sizeof(mp_size_t)))
#else
#define NPOWS \
((sizeof(mp_size_t) > 6 ? 48 : 8*sizeof(mp_size_t)) - LOG2C (BINV_NEWTON_THRESHOLD))
#endif
mp_size_t
mpn_binvert_itch (mp_size_t n)
{
#if WANT_FFT
if (ABOVE_THRESHOLD (n, 2 * MUL_FFT_MODF_THRESHOLD))
return mpn_fft_next_size (n, mpn_fft_best_k (n, 0));
else
#endif
return 3 * (n - (n >> 1));
}
void
mpn_binvert (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_ptr scratch)
{
mp_ptr xp;
mp_size_t rn, newrn;
mp_size_t sizes[NPOWS], *sizp;
mp_limb_t di;
/* Compute the computation precisions from highest to lowest, leaving the
base case size in 'rn'. */
sizp = sizes;
for (rn = n; ABOVE_THRESHOLD (rn, BINV_NEWTON_THRESHOLD); rn = (rn + 1) >> 1)
*sizp++ = rn;
xp = scratch;
/* Compute a base value using a low-overhead O(n^2) algorithm. FIXME: We
should call some divide-and-conquer lsb division function here for an
operand subrange. */
MPN_ZERO (xp, rn);
xp[0] = 1;
binvert_limb (di, up[0]);
if (BELOW_THRESHOLD (rn, DC_BDIV_Q_THRESHOLD))
mpn_sb_bdiv_q (rp, xp, rn, up, rn, -di);
else
mpn_dc_bdiv_q (rp, xp, rn, up, rn, -di);
/* Use Newton iterations to get the desired precision. */
for (; rn < n; rn = newrn)
{
newrn = *--sizp;
#if WANT_FFT
if (ABOVE_THRESHOLD (newrn, 2 * MUL_FFT_MODF_THRESHOLD))
{
int k;
mp_size_t m, i;
k = mpn_fft_best_k (newrn, 0);
m = mpn_fft_next_size (newrn, k);
mpn_mul_fft (xp, m, up, newrn, rp, rn, k);
for (i = rn - 1; i >= 0; i--)
if (xp[i] > (i == 0))
{
mpn_add_1 (xp + rn, xp + rn, newrn - rn, 1);
break;
}
}
else
#endif
mpn_mul (xp, up, newrn, rp, rn);
mpn_mullow_n (rp + rn, rp, xp + rn, newrn - rn);
mpn_neg_n (rp + rn, rp + rn, newrn - rn);
}
}