/* Copyright 1996 Acorn Computers Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

/* math.c: ANSI draft (X3J11 May 86) library code, section D.5 */
/* Copyright (C) Codemist Ltd, 1988 */

/* version 0.04b */
/* Nov 87: fix bug in ibm frexp(-ve arg).                                   */

/*
 * This version of the code takes the view that whenever there is an
 * error a NaN should be handed back (as well as errno getting set). The
 * value HUGE_VAL is used, which is not actually a NaN but which will
 * often lead to exponent overflow pretty soon if it is used.  ACN is
 * unclear if this is sensible, and has had a program fall over when
 * atan2(0.0, 0.0) handed back HUGE_VAL rather than some less vicious
 * value (e.g. 0.0).  He can imagine people who expect pow(0.0, 0.0) to
 * be 1.0 (or maybe 0.0, but certainly not HUGE_VAL), and who expect
 * sin(x) to be <= 1.0 in absolute value regardless of anything.  Thus
 * the current state is OK if we are being strict, but mey be unfriendly
 * in some cases?  Thoughts and comments, anybody?
 */

#include "hostsys.h"
#include <limits.h>
#include <float.h>
#include <errno.h>

/* This file contains code for most of the math routines from <math.h>      */

/* On the ARM some of these routines are implemented as floating point      */
/* opcodes and as such appear in startup.s                                  */

#ifndef NO_FLOATING_POINT

#ifndef HOST_HAS_TRIG
#error HOST_HAS_TRIG assumed - software sin et al removed
#endif

#include <math.h>                          /* for forward references */
#include <fenv.h>

#pragma STDC FENV_ACCESS ON

#ifdef __CC_NORCROFT
/* NAN definitions to match the FPA support code */
#define NAN_XRem0       0f_7fc0000a
#define NAN_SqrtNeg     0f_7fc0000b
#define NAN_SinCosRange 0f_7fc00010
#define NAN_TanRange    0f_7fc00012
#define NAN_AsnAcsRange 0f_7fc00015
#define NAN_LgnLogNeg   0f_7fc00018
#define NAN_NegPowX     0f_7fc00019
#define NAN_0PowNonpos  0f_7fc0001a
#else
#define NAN_XRem0       NAN
#define NAN_SqrtNeg     NAN
#define NAN_SinCosRange NAN
#define NAN_TanRange    NAN
#define NAN_AsnAcsRange NAN
#define NAN_LgnLogNeg   NAN
#define NAN_NegPowX     NAN
#define NAN_0PowNonpos  NAN
#endif

/* On the ARM, this has moved into the library's static data area */
/* so that it still works with the Shared C Library module.       */
/* const double __huge_val = 1.79769313486231571e+308; */

/* New versions with Annex F behaviour; exported for complex.c */
double _new_cosh(double x);
double _new_sinh(double x);
double _new_tanh(double x);

double frexp(double d, int *lvn)
{
/* This version works even if d starts off as an unnormalized number in  */
/* the IEEE sense. But in that special case it will be mighty slow!      */
/* By that we mean at most 52 iterations for the smallest number.        */
    fp_number d1;
    int n;
    if (d==0.0)
    {   *lvn = 0;
        return d;
    }
    d1.d = d;
    if ((n = d1.i.x - 0x3fe) == -0x3fe)
    {   int flag;
/* Here d1 has zero in its exponent field - this means that the mantissa */
/* is un-normalized. I have to shift it left (at least one bit) until a  */
/* suitable nonzero bit appears to go in the implicit-bit place in the   */
/* fractional result. For each bit shifted I need to adjust the final    */
/* exponent that will be returned.                                       */
/* I have already tested to see if d was zero so the following loop MUST */
/* terminate.                                                            */
        n++;
        do
        {   flag = d1.i.mhi & 0x00080000;
            d1.i.mhi = (d1.i.mhi << 1) | (d1.i.mlo >> 31);
            d1.i.mlo = d1.i.mlo << 1;
            n--;
        } while (flag==0);
    }
    else if (n == 0x401)
    {
/* Here d1 has 0x7ff in its exponent field - it's a NaN or infinity      */
        *lvn = (d1.i.mhi || d1.i.mlo) ? FP_ILOGBNAN : INT_MAX;
        return d1.d;
    }
    *lvn = n;
    d1.i.x = 0x3fe;
    return d1.d;
}

float frexpf(float s, int *lvn)
{
    fp_number_single s1;
    int n;
    if (s==0.0F)
    {   *lvn = 0;
        return s;
    }
    s1.s = s;
    if ((n = s1.i.x - 0x7e) == -0x7e)
    {   int flag;
        n++;
        do
        {   flag = s1.i.m & 0x00400000;
            s1.i.m = s1.i.m << 1;
            n--;
        } while (flag==0);
    }
    else if (n == 0x81)
    {
/* Here d1 has 0xff in its exponent field - it's a NaN or infinity       */
        *lvn = (s1.i.m) ? FP_ILOGBNAN : INT_MAX;
        return s1.s;
    }
    *lvn = n;
    s1.i.x = 0x7e;
    return s1.s;
}

double scalbln(double d, long int n)
{
    fp_number x;
    long int exponent;
    if (n == 0) return d;
    x.d = d;
    exponent = x.i.x;
    if (exponent == 0x7ff) return d; /* NaN or infinite */
    if (exponent == 0) /* starting subnormal or zero */
    {   int flag;
        if (x.i.mhi == 0 && x.i.mlo == 0) return d;
        exponent++;
        do
        {   flag = x.i.mhi & 0x00080000;
            x.i.mhi = (x.i.mhi << 1) | (x.i.mlo >> 31);
            x.i.mlo = x.i.mlo << 1;
            exponent--;
        } while (flag==0);
    }
    if (n > 0x1000 || exponent + n >= 0x7ff) /* overflow */
    {   feraiseexcept(FE_OVERFLOW|FE_INEXACT);
        return x.i.s ? -INFINITY : INFINITY;
    }
    if (n < -0x1000 || exponent + n <= -53) /* total underflow */
    {   feraiseexcept(FE_UNDERFLOW|FE_INEXACT);
        x.i.x = x.i.mhi = x.i.mlo = 0;
        return x.d;
    }
    if (exponent + n <= 0) /* subnormal result */
    {   unsigned round = 0, hi;
        n = 1 - (exponent + n);
        hi = 0x00100000 | x.i.mhi;

        if (n >= 32)
        {   round = x.i.mlo;
            x.i.mlo = hi;
            hi = 0;
            n -= 32;
        }
        if (n > 0)
        {   round = (x.i.mlo << (32-n)) | (round != 0);
            x.i.mlo = (hi << (32-n)) | (x.i.mlo >> n);
            hi = hi >> n;
        }
        x.i.mhi = hi;
        x.i.x = 0;
        /* perform round-to-nearest; to even in tie cases */
        if (round > 0x80000000 ||
            (round == 0x80000000 && (x.i.mlo & 1)))
            if (++x.i.mlo == 0)
                if (++x.i.mhi == 0)
                    x.i.x = 1;

        if (round != 0) feraiseexcept(FE_UNDERFLOW|FE_INEXACT);

        return x.d;
    }
    /* normal result */
    x.i.x = (int) (exponent + n);
    return x.d;
}

float scalblnf(float s, long int n)
{
    fp_number_single x;
    long int exponent;
    if (n == 0) return s;
    x.s = s;
    exponent = x.i.x;
    if (exponent == 0xff) return s; /* NaN or infinite */
    if (exponent == 0) /* starting subnormal or zero */
    {   int flag;
        if (x.i.m == 0) return s;
        exponent++;
        do
        {   flag = x.i.m & 0x00400000;
            x.i.m = x.i.m << 1;
            exponent--;
        } while (flag==0);
    }
    if (n > 0x1000 || exponent + n >= 0xff) /* overflow */
    {   feraiseexcept(FE_OVERFLOW|FE_INEXACT);
        return x.i.s ? -INFINITY : INFINITY;
    }
    if (n < -0x1000 || exponent + n <= -24) /* total underflow */
    {   feraiseexcept(FE_UNDERFLOW|FE_INEXACT);
        x.i.x = x.i.m = 0;
        return x.s;
    }
    if (exponent + n <= 0) /* subnormal result */
    {   unsigned round, m;
        n = 1 - (exponent + n);
        m = 0x00800000 | x.i.m;

        round = m << (32-n);
        x.i.m = m >> n;
        x.i.x = 0;
        /* perform round-to-nearest; to even in tie cases */
        if (round > 0x80000000 ||
            (round == 0x80000000 && (x.i.m & 1)))
            if (++x.i.m == 0)
                  x.i.x = 1;

        if (round != 0) feraiseexcept(FE_UNDERFLOW|FE_INEXACT);

        return x.s;
    }
    /* normal result */
    x.i.x = (int) (exponent + n);
    return x.s;
}

double scalbn(double x, int n)
{
    return scalbln(x, n);
}

float scalbnf(float x, int n)
{
    return scalblnf(x, n);
}

double ldexp(double x, int n)
{
    fenv_t env;
    int exc;
    feholdexcept(&env);
    x = scalbn(x, n);
    exc = fetestexcept(FE_OVERFLOW|FE_UNDERFLOW);
    if (exc)
    {   errno = ERANGE;
        if (exc & FE_OVERFLOW)
        {   feclearexcept(FE_OVERFLOW);
            x = copysign(HUGE_VAL, x);
        }
    }
    feupdateenv(&env);
    return x;
}

float ldexpf(float x, int n)
{
    return scalbnf(x, n);
}

int ilogb(double d)
{
    fp_number x;
    int exponent;

    x.d = d;
    exponent = x.i.x;
    if (exponent == 0)
    {   int flag;
        if (x.i.mhi == 0 && x.i.mlo == 0) return FP_ILOGB0;
        exponent = 1;
        do
        {   flag = x.i.mhi & 0x00080000;
            x.i.mhi = (x.i.mhi << 1) | (x.i.mlo >> 31);
            x.i.mlo = x.i.mlo << 1;
            exponent--;
        } while (flag==0);
    }
    else if (exponent == 0x7ff)
    {   if (x.i.mhi || x.i.mlo)
            return FP_ILOGBNAN;
        else
            return INT_MAX;
    }
    return exponent - 0x3ff;
}

int ilogbf(float s)
{
    fp_number_single x;
    int exponent;

    x.s = s;
    exponent = x.i.x;
    if (exponent == 0)
    {   int flag;
        if (x.i.m == 0) return FP_ILOGB0;
        exponent = 1;
        do
        {   flag = x.i.m & 0x00400000;
            x.i.m = x.i.m << 1;
            exponent--;
        } while (flag==0);
    }
    else if (exponent == 0xff)
    {   if (x.i.m)
            return FP_ILOGBNAN;
        else
            return INT_MAX;
    }
    return exponent - 0x7f;
}

double logb(double d)
{
    int x = ilogb(d);

    switch (x)
    {
        case FP_ILOGB0:   feraiseexcept(FE_DIVBYZERO); return -INFINITY;
        case FP_ILOGBNAN: return d;
        case INT_MAX:     return INFINITY;
        default:          return x;
    }
}

float logbf(float s)
{
    int x = ilogbf(s);

    switch (x)
    {
        case FP_ILOGB0:   feraiseexcept(FE_DIVBYZERO); return -INFINITY;
        case FP_ILOGBNAN: return s;
        case INT_MAX:     return INFINITY;
        default:          return x;
    }
}

#define _exp_arg_limit 709.78271289338397

#define _pi_       3.14159265358979323846
#define _pi_3      1.04719755119659774615
#define _pi_2      1.57079632679489661923
#define _pi_4      0.78539816339744830962
#define _pi_6      0.52359877559829887038
#define _sqrt_half 0.70710678118654752440
#define _ln2hi_    0x1.62E42FEEp-1
#define _ln2lo_    0x1.A39EF35793C76p-33

static double fetidyexcept(const fenv_t *envp, double x)
{
    int exc = fetestexcept(FE_ALL_EXCEPT);
    if (exc & (FE_DIVBYZERO|FE_INVALID))
    {   if (exc & (FE_UNDERFLOW|FE_OVERFLOW|FE_INEXACT))
            feclearexcept(FE_UNDERFLOW|FE_OVERFLOW|FE_INEXACT);
    }
    else if (exc & FE_UNDERFLOW)
    {   /* Got to check for spurious underflow. We are allowed to raise underflow */
        /* whenever result is tiny (even if exact), so this is sufficient. */
        if (!isless(fabs(x), DBL_MIN))
            feclearexcept(FE_UNDERFLOW);
    }
    feupdateenv(envp);
    return x;
}

static float fetidyexceptf(const fenv_t *envp, float x)
{
    int exc = fetestexcept(FE_ALL_EXCEPT);
    if (exc & (FE_DIVBYZERO|FE_INVALID))
    {   if (exc & (FE_UNDERFLOW|FE_OVERFLOW|FE_INEXACT))
            feclearexcept(FE_UNDERFLOW|FE_OVERFLOW|FE_INEXACT);
    }
    else if (exc & FE_UNDERFLOW)
    {   /* Got to check for spurious underflow. We are allowed to raise underflow */
        /* whenever result is tiny (even if exact), so this is sufficient. */
        if (!isless(fabsf(x), FLT_MIN))
            feclearexcept(FE_UNDERFLOW);
    }
    feupdateenv(envp);
    return x;
}

double atan2(double y, double x)
{
    if (isunordered(y, x)) return y+x;

    if (y==0.0 || (isinf(x) && isfinite(y)))
    {   if (x==y) /* ie x==y==0 */
        {   errno = EDOM;
            return -HUGE_VAL;   // for backwards compatibility.
        }
        return signbit(x) ? copysign(_pi_, y) : y;
    }

    if (isinf(x)) /* y must also be infinite, from above */
        return copysign(x > 0 ? _pi_4 : 3*_pi_4, y);

    if (fabs(x) < fabs(y)) /* handles infinite y, finite x */
    {   if (fabs(x / y)<1.0e-20)
        {   if (y<0.0) return - _pi_2;
            else return _pi_2;
        }
    }
    y = atan(y / x);
    if (x<0.0)
    {   if (y>0.0) y -= _pi_;
        else y += _pi_;
    }
    return y;
}

/* atan2f supplied in mathasm; follows Annex F */

/* Use the macros, which expand to inline ABS instructions */
double (fabs)(double x) { return fabs(x); }
float (fabsf)(float x) { return fabsf(x); }

double sinh(double x)
{
    int sign;
    double y;
    if (x==0) return x;
    if (isless(x,0.0)) y = -x, sign = 1; else y = x, sign = 0;
    if (isgreater(y,1.0))
    {
/* _sinh_lnv is REQUIRED to read in as a number with the lower part of   */
/* its floating point representation zero.                               */
#define    _sinh_lnv     0.69316101074218750000          /* ln(v)        */
#define    _sinh_vm2     0.24999308500451499336          /* 1/v^2        */
#define    _sinh_v2m1    0.13830277879601902638e-4       /* (v/2) - 1    */
        double w = y - _sinh_lnv, z, r;
        if (w>_exp_arg_limit)
        {   if (isinf(x)) return x;
            errno = ERANGE;
            if (sign) return -HUGE_VAL;
            else return HUGE_VAL;
        }
        z = exp(w);   /* should not overflow!                            */
        if (z < 1.0e10) z = z - _sinh_vm2/z;
        r = z + _sinh_v2m1 * z;
        if (sign) return -r;
        else return r;
    }
    else if (!isgreater(y,1.0e-10)) return x;
    else
    {
#define _sinh_p0    -0.35181283430177117881e6
#define _sinh_p1    -0.11563521196851768270e5
#define _sinh_p2    -0.16375798202630751372e3
#define _sinh_p3    -0.78966127417357099479e0
#define _sinh_q0    -0.21108770058106271242e7
#define _sinh_q1     0.36162723109421836460e5
#define _sinh_q2    -0.27773523119650701667e3
#define _sinh_q3     1.0
        double g = x*x;
        double r;
        /* Use a (minimax) rational approximation. See Cody & Waite.     */
        r = ((((_sinh_p3*g + _sinh_p2)*g + _sinh_p1)*g + _sinh_p0)*g) /
             (((g + _sinh_q2)*g + _sinh_q1)*g + _sinh_q0);
        return x + x*r;
    }
}

double _new_sinh(double x)
{
    int sign;
    double y;
    if (x==0) return x;
    if (isless(x,0.0)) y = -x, sign = 1; else y = x, sign = 0;
    if (isgreater(y,1.0))
    {   fenv_t env;
        feholdexcept(&env);
        double w = y - _sinh_lnv, z, r;
        z = __d_exp(w);  /* may overflow */
        if (z < 1.0e10) z = z - _sinh_vm2/z;
        r = z + _sinh_v2m1 * z;
        return fetidyexcept(&env, sign ? -r : +r);
    }
    else if (islessequal(y,1.0e-10))
    {   feraiseexcept(FE_INEXACT);
        return x;
    }
    else /* NaN case comes here */
    {
        double g = x*x;
        double r;
        /* I assume this will always raise inexact (except for NaN). */
        /* Use a (minimax) rational approximation. See Cody & Waite.     */
        r = ((((_sinh_p3*g + _sinh_p2)*g + _sinh_p1)*g + _sinh_p0)*g) /
             (((g + _sinh_q2)*g + _sinh_q1)*g + _sinh_q0);
        return x + x*r;
    }
}

double cosh(double x)
{
    x = fabs(x);
    if (isgreater(x,1.0))
    {   x = x - _sinh_lnv;
        if (x>_exp_arg_limit)
        {   if (isinf(x)) return x;
            errno = ERANGE;
            return HUGE_VAL;
        }
        x = exp(x);   /* the range check above ensures that this does    */
                      /* not overflow.                                   */
        if (x < 1.0e10) x = x + _sinh_vm2/x;
        /* This very final line might JUST overflow even though the call */
        /* to exp is safe and even though _exp_arg_limit is conservative */
        return x + _sinh_v2m1 * x;
    }
/* This second part is satisfactory, even though it is simple!           */
    x = exp(x);
    return 0.5*(x + 1/x);
}

double _new_cosh(double x)
{
    fenv_t env;
    feholdexcept(&env);
    x = fabs(x);
    if (isgreater(x,1.0))
    {   x = __d_exp(x - _sinh_lnv);
        if (x < 1.0e10) x = x + _sinh_vm2/x;
        x = x + _sinh_v2m1 * x;
    }
    else
    {
    /* This second part is satisfactory, even though it is simple!           */
        x = __d_exp(x);
        x = 0.5*(x + 1/x);
    }
    return fetidyexcept(&env, x);
}

double tanh(double x)
{
/* The first two exits avoid premature overflow as well as needless use  */
/* of the exp() function.                                                */
    int sign;
    if (isgreater(x,27.0)) return 1.0;  /* here exp(2x) dominates 1.0    */
    else if (isless(x,-27.0)) return -1.0;
    if (isless(x,0.0)) x = -x, sign = 1;
    else sign = 0;
    if (isgreater(x,0.549306144334054846)) /* ln(3)/2 is crossover point */
    {   x = exp(2.0*x);
        x = 1.0 - 2.0/(x + 1.0);
        if (sign) return -x;
        else return x;
    }
#define _tanh_p0    -0.16134119023996228053e4
#define _tanh_p1    -0.99225929672236083313e2
#define _tanh_p2    -0.96437492777225469787e0
#define _tanh_q0     0.48402357071988688686e4
#define _tanh_q1     0.22337720718962312926e4
#define _tanh_q2     0.11274474380534949335e3
#define _tanh_q3     1.0
    if (isgreater(x,1.0e-10))
    {   double y = x*x;
        /* minimax rational approximation                                */
        y = (((_tanh_p2*y + _tanh_p1)*y + _tanh_p0)*y) /
             (((y + _tanh_q2)*y + _tanh_q1)*y + _tanh_q0);
        x = x + x*y;
    }
    if (sign) return -x;
    else return x;
}

double _new_tanh(double x)
{
/* The first two exits avoid premature overflow as well as needless use  */
/* of the exp() function.                                                */
    int sign;
    if (isgreater(x,27.0))
    {   if (!isinf(x)) feraiseexcept(FE_INEXACT);
        return 1.0;  /* here exp(2x) dominates 1.0    */
    }
    else if (isless(x,-27.0))
    {   if (!isinf(x)) feraiseexcept(FE_INEXACT);
        return -1.0;
    }
    if (x == 0.0) return x;
    if (isless(x,0.0)) x = -x, sign = 1;
    else sign = 0;
    if (isgreater(x,0.549306144334054846)) /* ln(3)/2 is crossover point */
    {   x = exp(2.0*x);
        x = 1.0 - 2.0/(x + 1.0);
        if (sign) return -x;
        else return x;
    }
    if (!islessequal(x,1.0e-10))
    {   double y = x*x; /* NaN case comes here */
        /* I assume this will raise inexact (except for NaN) */
        /* minimax rational approximation                                */
        y = (((_tanh_p2*y + _tanh_p1)*y + _tanh_p0)*y) /
             (((y + _tanh_q2)*y + _tanh_q1)*y + _tanh_q0);
        x = x + x*y;
    }
    else
        feraiseexcept(FE_INEXACT);
    if (sign) return -x;
    else return x;
}

/* Simple forms for now */
float coshf(float x)
{
    fenv_t env;
    feholdexcept(&env);
    return fetidyexceptf(&env, (float)cosh(x));
}

float sinhf(float x)
{
    fenv_t env;
    feholdexcept(&env);
    return fetidyexceptf(&env, (float)sinh(x));
}

float tanhf(float x)
{
    fenv_t env;
    feholdexcept(&env);
    return fetidyexceptf(&env, (float)tanh(x));
}

double asinh(double x)
{
    if (x == 0) return x; // asinh(�0) returns �0

    double y = fabs(x), s; // rounding mode UP/DOWN danger here

    if (isless(y, 0x1.0p-32))
    {   feraiseexcept(FE_INEXACT);
        return x; // what about rounding mode
    }
    if (isgreater(y, 0x1.0p64))
    {   s = log1p(y) + _ln2lo_;
        return copysign(s + _ln2hi_, x);
    }
    s = 1.0/y;
    return copysign(log1p(y + y/(s+hypot(s,1.0))), x);
}

double acosh(double x)
{
    double t;
    if (isless(x, 1.0))
    {   feraiseexcept(FE_INVALID);
        return NAN_LgnLogNeg;
    }
    if (isgreater(x, 0x1.0p64))
    {   t = log1p(x) + _ln2lo_;
        return t + _ln2hi_;
    }
    t = sqrt(x - 1.0);
    #ifdef FE_DOWNWARD
    t = fabs(t); // otherwise acosh(1)=-0 in FE_DOWNWARD
    #endif
    return log1p(t * (t + sqrt(x + 1.0)));
}

double atanh(double x)
{
    double z = copysign(0.5, x);
    x = fabs(x);
    if (isgreater(x, 1.0))
        x = -1.0; // so log1p traps, but avoiding INX in 1.0-x
    else
    #ifdef FE_DOWNWARD
        x = x / fabs(1.0 - x); // otherwise atanh(1)=NaN in FE_DOWNWARD
    #else
        x = x / (1.0 - x);
    #endif
    return z * log1p(x + x);
}

/* The cast back to float for these 3 can only raise inexact */
float asinhf(float x)
{
    return (float) asinh(x);
}

float acoshf(float x)
{
    return (float) acosh(x);
}

float atanhf(float x)
{
    return (float) atanh(x);
}

/* expm1 implementation from Tang's 1992 paper */
#define _expm1_te    0x1p-54
#define _expm1_tp    0x1.C4474E1726455p10  // 2610 log 2
#define _expm1_tm   -0x1.2B708872320E1p5   //  -54 log 2
#define _expm1_t1   -0x1.269621134DB93p-2  // log (1 - 1/4) rounded down
#define _expm1_t2    0x1.C8FF7C79A9A22p-3  // log (1 + 1/4) rounded up
#define _expm1_Inv_L 0x1.71547652B82FEp5   // 32 / log 2
#define _expm1_L1    0x1.62E42FEF00000p-6  // log 2 / 32 (ms bits)
#define _expm1_L2    0x1.473DE6AF278EDp-39 // log 2 / 32 (extra bits)

static const double _expm1_s[32][2] = // 2^(j/32) with extended precision
{
    0x1.0000000000000p0, 0,
    0x1.059B0D3158540p0, 0x1.A1D73E2A475B4p-47,
    0x1.0B5586CF98900p0, 0x1.EC5317256E308p-49,
    0x1.11301D0125B40p0, 0x1.0A4EBBF1AED93p-48,
    0x1.172B83C7D5140p0, 0x1.D6E6FBE462876p-47,
    0x1.1D4873168B980p0, 0x1.53C02DC0144C8p-47,
    0x1.2387A6E756200p0, 0x1.C3360FD6D8E0Bp-47,
    0x1.29E9DF51FDEC0p0, 0x1.09612E8AFAD12p-47,
    0x1.306FE0A31B700p0, 0x1.52DE8D5A46306p-48,
    0x1.371A7373AA9C0p0, 0x1.54E28AA05E8A9p-49,
    0x1.3DEA64C123400p0, 0x1.11ADA0911F09Fp-47,
    0x1.44E0860618900p0, 0x1.68189B7A04EF8p-47,
    0x1.4BFDAD5362A00p0, 0x1.38EA1CBD7F621p-47,
    0x1.5342B569D4F80p0, 0x1.DF0A83C49D86Ap-52,
    0x1.5AB07DD485400p0, 0x1.4AC64980A8C8Fp-47,
    0x1.6247EB03A5580p0, 0x1.2C7C3E81BF4B7p-50,
    0x1.6A09E667F3BC0p0, 0x1.921165F626CDDp-49,
    0x1.71F75E8EC5F40p0, 0x1.9EE91B8797785p-47,
    0x1.7A11473EB0180p0, 0x1.B5F54408FDB37p-50,
    0x1.82589994CCE00p0, 0x1.28ACF88AFAB35p-48,
    0x1.8ACE5422AA0C0p0, 0x1.B5BA7C55A192Dp-48,
    0x1.93737B0CDC5C0p0, 0x1.27A280E1F92A0p-47,
    0x1.9C49182A3F080p0, 0x1.01C7C46B071F3p-48,
    0x1.A5503B23E2540p0, 0x1.C8B424491CAF8p-48,
    0x1.AE89F995AD380p0, 0x1.6AF439A68BB99p-47,
    0x1.B7F76F2FB5E40p0, 0x1.BAA9EC206AD4Fp-50,
    0x1.C199BDD855280p0, 0x1.C2220CB12A092p-48,
    0x1.CB720DCEF9040p0, 0x1.48A81E5E8F4A5p-47,
    0x1.D5818DCFBA480p0, 0x1.C976816BAD9B8p-50,
    0x1.DFC97337B9B40p0, 0x1.EB968CAC39ED3p-48,
    0x1.EA4AFA2A490C0p0, 0x1.9858F73A18F5Ep-48,
    0x1.F50765B6E4540p0, 0x1.9D3E12DD8A18Bp-54
};

static double poly(double x, int n, const double A[static n])
{
    double r = A[n-1] * x;
    for (int i = n-2; i >= 0; i--)
        r = (r + A[i]) * x;
    return r;
}

double expm1(double x)
{
    if (isless(fabs(x), _expm1_te))
    {   if (x == 0.0) return x;
        return (x*0x1p128 + fabs(x)) * 0x1p-128;
    }
    if (!islessequal(x, _expm1_tp))
    {   /* NaN comes here */
        return __d_exp(x); /* let exp() do all the hard work */
    }
    if (x < _expm1_tm)
    {   if (isfinite(x)) return -1.0 + 0x1p-128;
        return -1.0;
    }
    if (x <= _expm1_t1 || x >= _expm1_t2)
    {   /* procedure 1 */
        int n = (int) lrint(x * _expm1_Inv_L);
        int n2 = n & 31;
        int n1 = n - n2;
        double r1 = x - (n * _expm1_L1);
        double r2 = - (n * _expm1_L2);
        int m = n1 / 32;
        int j = n2;
        double r = r1 + r2;
        static const double A[5] = { 0x1.0000000000000p-1,
                                     0x1.5555555548F7Cp-3,
                                     0x1.5555555545D4Ep-5,
                                     0x1.11115B7AA905Ep-7,
                                     0x1.6C1728D739765p-10 };
        double q = poly(r, 5, A) * r;
        double p = r1 + (r2 + q);
        double s = _expm1_s[j][0] + _expm1_s[j][1];
        if (m >= 53)
        {   x = _expm1_s[j][1];
            if (m < 2*53) x -= scalbn(1, -m);
            return scalbn(_expm1_s[j][0] + (s * p + x), m);
        }
        else if (m <= -8)
            return scalbn(_expm1_s[j][0] + (s * p + _expm1_s[j][1]), m) - 1;
        else
            return scalbn((_expm1_s[j][0] - scalbn(1, -m)) +
                          (_expm1_s[j][0] * p + _expm1_s[j][1] * (1 + p)), m);
    }
    else
    {   /* procedure 2 */
        double u = (float) x;
        double v = x - u;
        double y = u * u * 0.5;
        double z = v * (x + u) * 0.5;
        static const double A[9] = { 0x1.5555555555549p-3,
                                     0x1.55555555554B6p-5,
                                     0x1.111111111A9F3p-7,
                                     0x1.6C16C16CE14C6p-10,
                                     0x1.A01A01159DD2Dp-13,
                                     0x1.A019F635825C4p-16,
                                     0x1.71E14BFE3DB59p-19,
                                     0x1.28295484734EAp-22,
                                     0x1.A2836AA646B96p-26 };
        double q = poly(x, 9, A) * x * x;
        if (fabs(y) >= 0x1p-7)
            return (u + y) + (q + (v + z));
        else
            return x + (y + (q + z));
    }
}

float expm1f(float x)
{
    fenv_t env;
    feholdexcept(&env);
    return fetidyexceptf(&env, (float)expm1(x));
}

double fmod(double x, double y)
{
/* floating point remainder of (x/y) for integral quotient. Remainder    */
/* has same sign as x.                                                   */
    double r;
    if (isinf(x) || y == 0.0)
    { errno = EDOM;
      return -HUGE_VAL;
    }
    r = remainder(fabs(x), y = fabs(y));
    if (signbit(r)) r += y;
    return copysign(r, x);
}

float fmodf(float x, float y)
{
    float r;
    r = remainderf(fabsf(x), y = fabsf(y));
    if (signbit(r)) r += y;
    return copysignf(r, x);
}

double (ceil)(double x)                { return ceil(x); }
float (ceilf)(float x)                 { return ceilf(x); }
double (floor)(double x)               { return floor(x); }
float (floorf)(float x)                { return floorf(x); }
double (rint)(double x)                { return rint(x); }
float (rintf)(float x)                 { return rintf(x); }
long int (lrint)(double x)             { return lrint(x); }
long int (lrintf)(float x)             { return lrintf(x); }
double (trunc)(double x)               { return trunc(x); }
float (truncf)(float x)                { return truncf(x); }

float (acosf)(float x)                 { return acosf(x); }
float (asinf)(float x)                 { return asinf(x); }
double (atan)(double x)                { return atan(x); }
float (atanf)(float x)                 { return atanf(x); }
float (sinf)(float x)                  { return sinf(x); }
float (cosf)(float x)                  { return cosf(x); }
float (tanf)(float x)                  { return tanf(x); }
float (expf)(float x)                  { return expf(x); }
float (logf)(float x)                  { return logf(x); }
float (log10f)(float x)                { return log10f(x); }
float (sqrtf)(float x)                 { return sqrtf(x); }

double modf(double value, double *iptr)
{
/* splits value into integral part & fraction (both same sign)           */
    fp_number x;
    int exponent, mask;
    if (value == 0.0)
    {   *iptr = value;
        return value;
    }
    x.d = value;
    if ((exponent = x.i.x - 0x3ff) < 0)
    {   *iptr = copysign(0.0, value);
        return value;
    }
    else if (exponent >= 52)
    {   *iptr = value;
        return copysign(0.0, value);
    }
    if (exponent >= 20)
    {   mask = ((unsigned) 0xffffffff) >> (exponent - 20);
        x.i.mlo &= ~mask;
    }
    else
    {   mask = 0xfffff >> exponent;
        x.i.mhi &= ~mask;
        x.i.mlo = 0;
    }
    *iptr = x.d;
    return value - x.d;
}

float modff(float value, float *iptr)
{
/* splits value into integral part & fraction (both same sign)           */
    fp_number_single x;
    int exponent, mask;
    if (value == 0.0F)
    {   *iptr = value;
        return value;
    }
    x.s = value;
    if ((exponent = x.i.x - 0xff) < 0)
    {   *iptr = copysignf(0.0F, value);
        return value;
    }
    else if (exponent >= 23)
    {   *iptr = value;
        return copysignf(0.0F, value);
    }
    mask = 0x7fffff >> exponent;
    x.i.m &= ~mask;
    *iptr = x.s;
    return value - x.s;
}

double nan(const char *s)
{
    return (double) NAN;
}

float nanf(const char *s)
{
    return NAN;
}

double fdim(double x, double y)
{
    if (islessequal(x, y))
       return 0.0;
    else
       return x - y; /* Will return NaN for NaN input */
}

float fdimf(float x, float y)
{
    if (islessequal(x, y))
       return 0.0F;
    else
       return x - y; /* Will return NaN for NaN input */
}

#define _sqrt2pi   2.50662827463100050242

/*
 * Gamma functions computed using Lanczos' approximation.
 * Double version uses coefficients computed as per Spouge (1994)
 * to achieve (theoretically) < 1 ulp error.
 * Float version uses original Lanczos coefficients.
 *
 * Lanczos' approximation:
 *
 * G(x+1) = (x+a)^(x+.5)* e^-(x+a) * sqrt(2*pi) *
 *            [ c0 + c1/(x+1) + c2/(x+3).. + cn/(x+n) ]    (x > 0)
 *
 *
 * Spouge's coefficients:
 *  c0 = 1
 *  c[k] = (-1)^(k-1) * e^(a-k) * (a-k)^(k-0.5)        ( 1 <= k < ceil(a) )
 *        ------------------------------------
 *                  sqrt(2*pi) * (k-1)!
 *
 * giving relative error < sqrt(a) * (2*pi)^-(a+0.5) / (a+x)
 *
 * Useful relations: gamma(x+1) = x*gamma(x)
 *                   gamma(1-x) = pi / (gamma(x)*sin(pi*x))
 *                   gamma(n+1) = n!
 */

static const double _lgamma_c[18] =   // actually c[k]*sqrt(2*pi)
{
     166599025.853949811570,
    -981939810.195007931170,
     2548964102.46316408700,
    -3836248618.43839895348,
     3709080184.61731236844,
    -2412708472.49486138749,
     1075449464.75190680642,
    -328534715.011179056348,
     67752870.4715251633277,
    -9145761.20044444915581,
     768402.637723269577278,
    -37175.9448951564986832,
     917.944248521710658895,
    -9.51510944794615044564,
     0.0294177178100457006509,
    -1.37185031730621246722e-5,
     1.72316912091954830013e-10,
    -2.50065513100139951901e-20
};

static inline double _gamma_sum(double x)
{
    double r = _sqrt2pi;
    for (int i = 1; i <= 18; i++)
        r += _lgamma_c[i-1] / (x + i);
    return r;
}

static const double _lgammaf_c[7] =
{
     1.000000000190015,
     76.18009172947146,
    -86.50532032941677,
     24.01409824083091,
    -1.231739572450155,
     1.208650973866179e-3,
    -5.395239384953e-6
};

static inline double _gammaf_sum(double x)
{
    double r = _lgammaf_c[0];
    for (int i = 1; i <= 6; i++)
        r += _lgammaf_c[i] / (x + i);
    return r;
}

double lgamma(double x)
{
    if (isinf(x)) return INFINITY;
    if (isnan(x)) return x;

    if (x < 1)
    {
        if (floor(x) == x)
        {   feraiseexcept(FE_DIVBYZERO);
            return INFINITY;
        }

        return __d_log(fabs((1-x)*_pi_/__d_sin(_pi_*x)))-lgamma(2-x);
    }

    if (x == 1 || x == 2) return +0;

    return (x+0.5)*__d_log(x+18.5) - (x+18.5) + __d_log(_gamma_sum(x) / x);
}

float lgammaf(float x)
{
    if (isinf(x)) return INFINITY;
    if (isnan(x)) return x;

    if (x < 1)
    {
        if (floorf(x) == x)
        {   feraiseexcept(FE_DIVBYZERO);
            return INFINITY;
        }

        return (float) __d_log(fabs((1.0-x)*_pi_/__d_sin(_pi_*x)))-lgammaf(2-x);
    }

    if (x == 1 || x == 2) return +0;

    return (float) ((x+0.5)*__d_log(x+5.5) - (x+5.5) + __d_log(_sqrt2pi * _gammaf_sum(x) / x));
}

double tgamma(double x)
{
    if (isinf(x))
    {   if (x > 0)
            return x;
        else
        {   feraiseexcept(FE_INVALID);
            return NAN;
        }
    }
    if (isnan(x)) return x;

    if (x == 0)
        return 1 / x; // �Inf, with Divide By Zero

    if (floor(x) == x)
    {
        if (x < 0)
        {   feraiseexcept(FE_INVALID);
            return NAN;
        }
        else if (x <= 171)
        {  double r = 1;
            for (int n = (int) x - 1; n > 1; n--)
                r *= n;
            return r;
        }
    }

    if (x < 1)
        return (1-x)*_pi_/(sin(_pi_*x)*tgamma(2-x));

    return __d_exp(lgamma(x));
}

float tgammaf(float x)
{
    if (isinf(x))
    {   if (x > 0)
            return x;
        else
        {   feraiseexcept(FE_INVALID);
            return NAN;
        }
    }
    if (isnan(x)) return x;

    if (x == 0)
        return 1 / x; // �Inf, with Divide By Zero

    if (floorf(x) == x)
    {
        if (x < 0)
        {   feraiseexcept(FE_INVALID);
            return NAN;
        }
        else if (x <= 35)
        {
            float r = 1;
            for (int n = (int) x - 1; n > 1; n--)
                r *= n;
            return r;
        }
    }

    if (x < 1)
        return (float) ((1.0-x)*_pi_/(sin(_pi_*x)*tgammaf(2-x)));

    return expf(lgammaf(x));
}

/* Error function algorithms derived from "Numerical recipes in C" */

#define _log_sqrt_pi 0.5723649429247000870717137 // == lgamma(0.5)

/* Series calculation for incomplete gamma function P(0.5,x); good for */
/* x <= 1.5. */
static double gser05(double x, double epsilon)
{
    double sum,del,ap;

    ap = 0.5;
    del = sum = 2;
    for (;;)
    {   ++ap;
        del *= x/ap;
        sum += del;
        if (fabs(del) < fabs(sum)*epsilon)
            return sum*exp(-x+0.5*log(x)-_log_sqrt_pi);
    }
}

/* Continued fraction calculation for incomplete gamma function */
/* 1 - P(0.5,x); good for x >= 1.5. */
static double gcf05(double x, double epsilon)
{
    double an,b,c,d,del,h;
    #define FPMIN 1e-300

    b = x+0.5;
    c = 1/FPMIN;
    d = 1/b;
    h = d;
    for (int i=1; ; i++)
    {   an = i*(0.5-i);
        b += 2;
        d = an*d+b;
        if (fabs(d) < FPMIN) d = FPMIN;
        c = b + an/c;
        if (fabs(c) < FPMIN) c = FPMIN;
        d = 1/d;
        del = d*c;
        h *= del;
        if (fabs(del-1) < epsilon) break;
    }
    return exp(-x+0.5*log(x)-_log_sqrt_pi)*h;
}

static double gammp05(double x, double epsilon)
{
    if (isunordered(x, 1.5))
        return x;
    else if (isless(x, 1.5))
        return gser05(x,epsilon);
    else
        return 1 - gcf05(x,epsilon);
}

static double gammq05(double x, double epsilon)
{
    if (isunordered(x, 1.5))
        return x;
    else if (isless(x, 1.5))
        return 1 - gser05(x,epsilon);
    else
        return gcf05(x,epsilon);
}

double erf(double x)
{
    if (x == 0) return x;
    if (isless(x, 0.0)) return -erf(-x);
    if (isgreater(x, 1e100)) return 1.0;

    return gammp05(x*x, 3*DBL_EPSILON);
}

double erfc(double x)
{
    if (isgreater(fabs(x), 1e100)) return isless(x, 0.0) ? 2 : 0;

    return isless(x, 0.0) ? 1+gammp05(x*x, 3*DBL_EPSILON)
                          : gammq05(x*x, 3*DBL_EPSILON);
}

float erff(float x)
{
    if (x == 0) return x;
    if (isless(x, 0.0)) return -erff(-x);
    if (isgreater(x, 1e15F)) return 1.0F;

    return (float) gammp05((double) x*x, 3.0*FLT_EPSILON);
}

#define _erfcf_c0  -1.26551223
#define _erfcf_c1   1.00002368
#define _erfcf_c2   0.37409196
#define _erfcf_c3   0.09678418
#define _erfcf_c4  -0.18628806
#define _erfcf_c5   0.27886807
#define _erfcf_c6  -1.13520398
#define _erfcf_c7   1.48851586
#define _erfcf_c8  -0.82215223
#define _erfcf_c9   0.17087277

float erfcf(float x)
{
    double t,r;

    if (isgreater(fabsf(x), 1e15F)) return isless(x, 0.0F) ? 2 : 0;

    t = 1/(1+0.5*fabsf(x));

    r = (((((((((_erfcf_c9) * t + _erfcf_c8) * t + _erfcf_c7) * t +
                 _erfcf_c6) * t + _erfcf_c5) * t + _erfcf_c4) * t +
                 _erfcf_c3) * t + _erfcf_c2) * t + _erfcf_c1) * t +
                 _erfcf_c0;

    r = t*exp(r - (double)x*x);
    return (float) (isless(x, 0.0F) ? 2-r : r);
}

/* Output quo is 0 for error cases, else has magnitude congruent */
/* modulo 2^31 to the magnitude of the integral quotient of x/y */
double remquo(double x, double y, int *quo)
{
    double r;
    long long xm, ym, rm;
    int i, xx, yx, sx;
    int sign, qsign;
    unsigned q=0;
    if (!islessgreater(x, 0.0) || !islessgreater(y, 0.0) ||
        isinf(x) || isinf(y))
        return *quo = 0, remainder(x, y);
    xx = ilogb(x);
    yx = ilogb(y);
    i = xx-yx+1;
    if (i < 0) return x;
    xm = (long long) scalbn(fabs(x), DBL_MANT_DIG-1-xx);
    ym = (long long) scalbn(fabs(y), DBL_MANT_DIG-1-yx);
    sign = sx = x < 0;
    qsign = sign ^ (y < 0);
    rm = xm;
    do
    {   if (ym < rm)
        {   sign = !sign;
            q = ~q;
            rm = ym+ym-rm;
        }
        i--, rm<<=1, q<<=1;
    } while (i >= 0);
    if (sign != sx) q = -q;
    q >>= 1;
    *quo = qsign ? -(int) q : (int) q;
    if (rm == 0) sign = sx, r = 0;
    else r = scalbn((double) rm, yx-1-DBL_MANT_DIG);
    return sign ? -r : +r;
}

float remquof(float x, float y, int *quo)
{
    float r;
    int xm, ym, rm;
    int i, xx, yx, sx;
    int sign, qsign;
    unsigned q=0;
    if (!islessgreater(x, 0.0F) || !islessgreater(y, 0.0F) ||
        isinf(x) || isinf(y))
        return *quo = 0, remainderf(x, y);
    xx = ilogbf(x);
    yx = ilogbf(y);
    i = xx-yx+1;
    if (i < 0) return x;
    xm = (int) scalbnf(fabsf(x), FLT_MANT_DIG-1-xx);
    ym = (int) scalbnf(fabsf(y), FLT_MANT_DIG-1-yx);
    sign = sx = x < 0;
    qsign = sign ^ (y < 0);
    rm = xm;
    do
    {   if (ym < rm)
        {   sign = !sign;
            q = ~q;
            rm = ym+ym-rm;
        }
        i--, rm<<=1, q<<=1;
    } while (i >= 0);
    if (sign != sx) q = -q;
    q >>= 1;
    *quo = qsign ? -(int) q : (int) q;
    if (rm == 0) sign = sx, r = 0;
    else r = scalbnf((float) rm, yx-1-FLT_MANT_DIG);
    return sign ? -r : +r;
}

#endif /* NO_FLOATING_POINT */

/* end of math.c */