| /* mpfr_exp -- exponential of a floating-point number |
| |
| Copyright 1999, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc. |
| Contributed by the Arenaire and Cacao projects, INRIA. |
| |
| This file is part of the GNU MPFR Library. |
| |
| The GNU MPFR Library is free software; you can redistribute it and/or modify |
| it under the terms of the GNU Lesser General Public License as published by |
| the Free Software Foundation; either version 2.1 of the License, or (at your |
| option) any later version. |
| |
| The GNU MPFR Library is distributed in the hope that it will be useful, but |
| WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
| or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public |
| License for more details. |
| |
| You should have received a copy of the GNU Lesser General Public License |
| along with the GNU MPFR Library; see the file COPYING.LIB. If not, write to |
| the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, |
| MA 02110-1301, USA. */ |
| |
| #define MPFR_NEED_LONGLONG_H /* for MPFR_MPZ_SIZEINBASE2 */ |
| #include "mpfr-impl.h" |
| |
| /* y <- exp(p/2^r) within 1 ulp, using 2^m terms from the series |
| Assume |p/2^r| < 1. |
| We use the following binary splitting formula: |
| P(a,b) = p if a+1=b, P(a,c)*P(c,b) otherwise |
| Q(a,b) = a*2^r if a+1=b [except Q(0,1)=1], Q(a,c)*Q(c,b) otherwise |
| T(a,b) = P(a,b) if a+1=b, Q(c,b)*T(a,c)+P(a,c)*T(c,b) otherwise |
| Then exp(p/2^r) ~ T(0,i)/Q(0,i) for i so that (p/2^r)^i/i! is small enough. |
| |
| Since P(a,b) = p^(b-a), and we consider only values of b-a of the form 2^j, |
| we don't need to compute P(), we only precompute p^(2^j) in the ptoj[] array |
| below. |
| |
| Since Q(a,b) is divisible by 2^(r*(b-a-1)), we don't compute the power of |
| two part. |
| */ |
| static void |
| mpfr_exp_rational (mpfr_ptr y, mpz_ptr p, long r, int m, |
| mpz_t *Q, mp_prec_t *mult) |
| { |
| unsigned long n, i, j; |
| mpz_t *S, *ptoj; |
| mp_prec_t *log2_nb_terms; |
| mp_exp_t diff, expo; |
| mp_prec_t precy = MPFR_PREC(y), prec_i_have, prec_ptoj; |
| int k, l; |
| |
| MPFR_ASSERTN ((size_t) m < sizeof (long) * CHAR_BIT - 1); |
| |
| S = Q + (m+1); |
| ptoj = Q + 2*(m+1); /* ptoj[i] = mantissa^(2^i) */ |
| log2_nb_terms = mult + (m+1); |
| |
| /* Normalize p */ |
| MPFR_ASSERTD (mpz_cmp_ui (p, 0) != 0); |
| n = mpz_scan1 (p, 0); /* number of trailing zeros in p */ |
| mpz_tdiv_q_2exp (p, p, n); |
| r -= n; /* since |p/2^r| < 1 and p >= 1, r >= 1 */ |
| |
| /* Set initial var */ |
| mpz_set (ptoj[0], p); |
| for (k = 1; k < m; k++) |
| mpz_mul (ptoj[k], ptoj[k-1], ptoj[k-1]); /* ptoj[k] = p^(2^k) */ |
| mpz_set_ui (Q[0], 1); |
| mpz_set_ui (S[0], 1); |
| k = 0; |
| mult[0] = 0; /* the multiplier P[k]/Q[k] for the remaining terms |
| satisfies P[k]/Q[k] <= 2^(-mult[k]) */ |
| log2_nb_terms[0] = 0; /* log2(#terms) [exact in 1st loop where 2^k] */ |
| prec_i_have = 0; |
| |
| /* Main Loop */ |
| n = 1UL << m; |
| for (i = 1; (prec_i_have < precy) && (i < n); i++) |
| { |
| /* invariant: Q[0]*Q[1]*...*Q[k] equals i! */ |
| k++; |
| log2_nb_terms[k] = 0; /* 1 term */ |
| mpz_set_ui (Q[k], i + 1); |
| mpz_set_ui (S[k], i + 1); |
| j = i + 1; /* we have computed j = i+1 terms so far */ |
| l = 0; |
| while ((j & 1) == 0) /* combine and reduce */ |
| { |
| /* invariant: S[k] corresponds to 2^l consecutive terms */ |
| mpz_mul (S[k], S[k], ptoj[l]); |
| mpz_mul (S[k-1], S[k-1], Q[k]); |
| /* Q[k] corresponds to 2^l consecutive terms too. |
| Since it does not contains the factor 2^(r*2^l), |
| when going from l to l+1 we need to multiply |
| by 2^(r*2^(l+1))/2^(r*2^l) = 2^(r*2^l) */ |
| mpz_mul_2exp (S[k-1], S[k-1], r << l); |
| mpz_add (S[k-1], S[k-1], S[k]); |
| mpz_mul (Q[k-1], Q[k-1], Q[k]); |
| log2_nb_terms[k-1] ++; /* number of terms in S[k-1] |
| is a power of 2 by construction */ |
| MPFR_MPZ_SIZEINBASE2 (prec_i_have, Q[k]); |
| MPFR_MPZ_SIZEINBASE2 (prec_ptoj, ptoj[l]); |
| mult[k-1] += prec_i_have + (r << l) - prec_ptoj - 1; |
| prec_i_have = mult[k] = mult[k-1]; |
| /* since mult[k] >= mult[k-1] + nbits(Q[k]), |
| we have Q[0]*...*Q[k] <= 2^mult[k] = 2^prec_i_have */ |
| l ++; |
| j >>= 1; |
| k --; |
| } |
| } |
| |
| /* accumulate all products in S[0] and Q[0]. Warning: contrary to above, |
| here we do not have log2_nb_terms[k-1] = log2_nb_terms[k]+1. */ |
| l = 0; /* number of accumulated terms in the right part S[k]/Q[k] */ |
| while (k > 0) |
| { |
| j = log2_nb_terms[k-1]; |
| mpz_mul (S[k], S[k], ptoj[j]); |
| mpz_mul (S[k-1], S[k-1], Q[k]); |
| l += 1 << log2_nb_terms[k]; |
| mpz_mul_2exp (S[k-1], S[k-1], r * l); |
| mpz_add (S[k-1], S[k-1], S[k]); |
| mpz_mul (Q[k-1], Q[k-1], Q[k]); |
| k--; |
| } |
| |
| /* Q[0] now equals i! */ |
| MPFR_MPZ_SIZEINBASE2 (prec_i_have, S[0]); |
| diff = (mp_exp_t) prec_i_have - 2 * (mp_exp_t) precy; |
| expo = diff; |
| if (diff >= 0) |
| mpz_div_2exp (S[0], S[0], diff); |
| else |
| mpz_mul_2exp (S[0], S[0], -diff); |
| |
| MPFR_MPZ_SIZEINBASE2 (prec_i_have, Q[0]); |
| diff = (mp_exp_t) prec_i_have - (mp_prec_t) precy; |
| expo -= diff; |
| if (diff > 0) |
| mpz_div_2exp (Q[0], Q[0], diff); |
| else |
| mpz_mul_2exp (Q[0], Q[0], -diff); |
| |
| mpz_tdiv_q (S[0], S[0], Q[0]); |
| mpfr_set_z (y, S[0], GMP_RNDD); |
| MPFR_SET_EXP (y, MPFR_GET_EXP (y) + expo - r * (i - 1) ); |
| } |
| |
| #define shift (BITS_PER_MP_LIMB/2) |
| |
| int |
| mpfr_exp_3 (mpfr_ptr y, mpfr_srcptr x, mp_rnd_t rnd_mode) |
| { |
| mpfr_t t, x_copy, tmp; |
| mpz_t uk; |
| mp_exp_t ttt, shift_x; |
| unsigned long twopoweri; |
| mpz_t *P; |
| mp_prec_t *mult; |
| int i, k, loop; |
| int prec_x; |
| mp_prec_t realprec, Prec; |
| int iter; |
| int inexact = 0; |
| MPFR_SAVE_EXPO_DECL (expo); |
| MPFR_ZIV_DECL (ziv_loop); |
| |
| MPFR_LOG_FUNC (("x[%#R]=%R rnd=%d", x, x, rnd_mode), |
| ("y[%#R]=%R inexact=%d", y, y, inexact)); |
| |
| MPFR_SAVE_EXPO_MARK (expo); |
| |
| /* decompose x */ |
| /* we first write x = 1.xxxxxxxxxxxxx |
| ----- k bits -- */ |
| prec_x = MPFR_INT_CEIL_LOG2 (MPFR_PREC (x)) - MPFR_LOG2_BITS_PER_MP_LIMB; |
| if (prec_x < 0) |
| prec_x = 0; |
| |
| ttt = MPFR_GET_EXP (x); |
| mpfr_init2 (x_copy, MPFR_PREC(x)); |
| mpfr_set (x_copy, x, GMP_RNDD); |
| |
| /* we shift to get a number less than 1 */ |
| if (ttt > 0) |
| { |
| shift_x = ttt; |
| mpfr_div_2ui (x_copy, x, ttt, GMP_RNDN); |
| ttt = MPFR_GET_EXP (x_copy); |
| } |
| else |
| shift_x = 0; |
| MPFR_ASSERTD (ttt <= 0); |
| |
| /* Init prec and vars */ |
| realprec = MPFR_PREC (y) + MPFR_INT_CEIL_LOG2 (prec_x + MPFR_PREC (y)); |
| Prec = realprec + shift + 2 + shift_x; |
| mpfr_init2 (t, Prec); |
| mpfr_init2 (tmp, Prec); |
| mpz_init (uk); |
| |
| /* Main loop */ |
| MPFR_ZIV_INIT (ziv_loop, realprec); |
| for (;;) |
| { |
| int scaled = 0; |
| MPFR_BLOCK_DECL (flags); |
| |
| k = MPFR_INT_CEIL_LOG2 (Prec) - MPFR_LOG2_BITS_PER_MP_LIMB; |
| |
| /* now we have to extract */ |
| twopoweri = BITS_PER_MP_LIMB; |
| |
| /* Allocate tables */ |
| P = (mpz_t*) (*__gmp_allocate_func) (3*(k+2)*sizeof(mpz_t)); |
| for (i = 0; i < 3*(k+2); i++) |
| mpz_init (P[i]); |
| mult = (mp_prec_t*) (*__gmp_allocate_func) (2*(k+2)*sizeof(mp_prec_t)); |
| |
| /* Particular case for i==0 */ |
| mpfr_extract (uk, x_copy, 0); |
| MPFR_ASSERTD (mpz_cmp_ui (uk, 0) != 0); |
| mpfr_exp_rational (tmp, uk, shift + twopoweri - ttt, k + 1, P, mult); |
| for (loop = 0; loop < shift; loop++) |
| mpfr_sqr (tmp, tmp, GMP_RNDD); |
| twopoweri *= 2; |
| |
| /* General case */ |
| iter = (k <= prec_x) ? k : prec_x; |
| for (i = 1; i <= iter; i++) |
| { |
| mpfr_extract (uk, x_copy, i); |
| if (MPFR_LIKELY (mpz_cmp_ui (uk, 0) != 0)) |
| { |
| mpfr_exp_rational (t, uk, twopoweri - ttt, k - i + 1, P, mult); |
| mpfr_mul (tmp, tmp, t, GMP_RNDD); |
| } |
| MPFR_ASSERTN (twopoweri <= LONG_MAX/2); |
| twopoweri *=2; |
| } |
| |
| /* Clear tables */ |
| for (i = 0; i < 3*(k+2); i++) |
| mpz_clear (P[i]); |
| (*__gmp_free_func) (P, 3*(k+2)*sizeof(mpz_t)); |
| (*__gmp_free_func) (mult, 2*(k+2)*sizeof(mp_prec_t)); |
| |
| if (shift_x > 0) |
| { |
| MPFR_BLOCK (flags, { |
| for (loop = 0; loop < shift_x - 1; loop++) |
| mpfr_sqr (tmp, tmp, GMP_RNDD); |
| mpfr_sqr (t, tmp, GMP_RNDD); |
| } ); |
| |
| if (MPFR_UNLIKELY (MPFR_OVERFLOW (flags))) |
| { |
| /* tmp <= exact result, so that it is a real overflow. */ |
| inexact = mpfr_overflow (y, rnd_mode, 1); |
| MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, MPFR_FLAGS_OVERFLOW); |
| break; |
| } |
| |
| if (MPFR_UNLIKELY (MPFR_UNDERFLOW (flags))) |
| { |
| /* This may be a spurious underflow. So, let's scale |
| the result. */ |
| mpfr_mul_2ui (tmp, tmp, 1, GMP_RNDD); /* no overflow, exact */ |
| mpfr_sqr (t, tmp, GMP_RNDD); |
| if (MPFR_IS_ZERO (t)) |
| { |
| /* approximate result < 2^(emin - 3), thus |
| exact result < 2^(emin - 2). */ |
| inexact = mpfr_underflow (y, (rnd_mode == GMP_RNDN) ? |
| GMP_RNDZ : rnd_mode, 1); |
| MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, MPFR_FLAGS_UNDERFLOW); |
| break; |
| } |
| scaled = 1; |
| } |
| } |
| |
| if (mpfr_can_round (shift_x > 0 ? t : tmp, realprec, GMP_RNDD, GMP_RNDZ, |
| MPFR_PREC(y) + (rnd_mode == GMP_RNDN))) |
| { |
| inexact = mpfr_set (y, shift_x > 0 ? t : tmp, rnd_mode); |
| if (MPFR_UNLIKELY (scaled && MPFR_IS_PURE_FP (y))) |
| { |
| int inex2; |
| mp_exp_t ey; |
| |
| /* The result has been scaled and needs to be corrected. */ |
| ey = MPFR_GET_EXP (y); |
| inex2 = mpfr_mul_2si (y, y, -2, rnd_mode); |
| if (inex2) /* underflow */ |
| { |
| if (rnd_mode == GMP_RNDN && inexact < 0 && |
| MPFR_IS_ZERO (y) && ey == __gmpfr_emin + 1) |
| { |
| /* Double rounding case: in GMP_RNDN, the scaled |
| result has been rounded downward to 2^emin. |
| As the exact result is > 2^(emin - 2), correct |
| rounding must be done upward. */ |
| /* TODO: make sure in coverage tests that this line |
| is reached. */ |
| inexact = mpfr_underflow (y, GMP_RNDU, 1); |
| } |
| else |
| { |
| /* No double rounding. */ |
| inexact = inex2; |
| } |
| MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, MPFR_FLAGS_UNDERFLOW); |
| } |
| } |
| break; |
| } |
| |
| MPFR_ZIV_NEXT (ziv_loop, realprec); |
| Prec = realprec + shift + 2 + shift_x; |
| mpfr_set_prec (t, Prec); |
| mpfr_set_prec (tmp, Prec); |
| } |
| MPFR_ZIV_FREE (ziv_loop); |
| |
| mpz_clear (uk); |
| mpfr_clear (tmp); |
| mpfr_clear (t); |
| mpfr_clear (x_copy); |
| MPFR_SAVE_EXPO_FREE (expo); |
| return inexact; |
| } |