JuliaMath · heltonmc · Jan 22, 2022 · Jan 18, 2022 · Jan 18, 2022 · Jan 18, 2022
diff --git a/src/Bessels.jl b/src/Bessels.jl
@@ -7,6 +7,8 @@ export besselj
 export bessely0
 export bessely1
 
+export besseli
+export besselix
 export besseli0
 export besseli0x
 export besseli1
@@ -32,7 +34,8 @@ include("Float128/bessely.jl")
 include("Float128/constants.jl")
 
 include("math_constants.jl")
-#include("parse.jl")
+include("U_polynomials.jl")
+include("recurrence.jl")
 #include("hankel.jl")
 
 end
diff --git a/src/U_polynomials.jl b/src/U_polynomials.jl
@@ -0,0 +1,82 @@
+function Uk_poly_Kn(p, v, p2, ::Type{Float32})
+    u0 = one(p)
+    u1 = 1 / 24 * evalpoly(p2, (3, -5))
+    u2 = 1 / 1152 * evalpoly(p2, (81, -462, 385))
+    u3 = 1 / 414720 * evalpoly(p2, (30375, -369603, 765765, -425425))
+    u4 = 1 / 39813120 * evalpoly(p2, (4465125, -94121676, 349922430, -446185740, 185910725))
+    return evalpoly(-p/v, (u0, u1, u2, u3, u4))
+end
+function Uk_poly_Kn(p, v, p2, ::Type{T}) where T <: Float64
+    u0 = one(T)
+    u1 = 1 / 24 * evalpoly(p2, (3, -5))
+    u2 = 1 / 1152 * evalpoly(p2, (81, -462, 385))
+    u3 = 1 / 414720 * evalpoly(p2, (30375, -369603, 765765, -425425))
+    u4 = 1 / 39813120 * evalpoly(p2, (4465125, -94121676, 349922430, -446185740, 185910725))
+    u5 = 1 / 6688604160 * evalpoly(p2, (1519035525, -49286948607, 284499769554, -614135872350, 566098157625, -188699385875))
+    u6 = 1 / 4815794995200 * evalpoly(p2, (2757049477875, -127577298354750, 1050760774457901, -3369032068261860,5104696716244125, -3685299006138750, 1023694168371875))
+   return evalpoly(-p/v, (u0, u1, u2, u3, u4, u5, u6))
+end
+function Uk_poly_In(p, v, p2, ::Type{T}) where T <: Float64
+    u0 = one(T)
+    u1 = -1 / 24 * evalpoly(p2, (3, -5))
+    u2 = 1 / 1152 * evalpoly(p2, (81, -462, 385))
+    u3 = -1 / 414720 * evalpoly(p2, (30375, -369603, 765765, -425425))
+    u4 = 1 / 39813120 * evalpoly(p2, (4465125, -94121676, 349922430, -446185740, 185910725))
+    u5 = -1 / 6688604160 * evalpoly(p2, (1519035525, -49286948607, 284499769554, -614135872350, 566098157625, -188699385875))
+    u6 = 1 / 4815794995200 * evalpoly(p2, (2757049477875, -127577298354750, 1050760774457901, -3369032068261860,5104696716244125, -3685299006138750, 1023694168371875))
+    return evalpoly(-p/v, (u0, u1, u2, u3, u4, u5, u6))
+end
+function Uk_poly_In(p, v, p2, ::Type{Float32})
+    u0 = one(p)
+    u1 = -1 / 24 * evalpoly(p2, (3, -5))
+    u2 = 1 / 1152 * evalpoly(p2, (81, -462, 385))
+    u3 = -1 / 414720 * evalpoly(p2, (30375, -369603, 765765, -425425))
+    u4 = 1 / 39813120 * evalpoly(p2, (4465125, -94121676, 349922430, -446185740, 185910725))
+    return evalpoly(-p/v, (u0, u1, u2, u3, u4))
+ end
+
+#=
+function Uk_poly_Kn(p, v, p2, ::Type{T}) where T <: BigFloat
+    u0 = one(T)
+    u1 = 1 / 24 * evalpoly(p2, (3, -5))
+    u2 = 1 / 1152 * evalpoly(p2, (81, -462, 385))
+    u3 = 1 / 414720 * evalpoly(p2, (30375, -369603, 765765, -425425))
+    u4 = 1 / 39813120 * evalpoly(p2, (4465125, -94121676, 349922430, -446185740, 185910725))
+    u5 = 1 / 6688604160 * evalpoly(p2, (1519035525, -49286948607, 284499769554, -614135872350, 566098157625, -188699385875))
+    u6 = 1 / 4815794995200 * evalpoly(p2, (2757049477875, -127577298354750, 1050760774457901, -3369032068261860,5104696716244125, -3685299006138750, 1023694168371875))
+    u7 = 1 / 115579079884800 * evalpoly(p2, (199689155040375, -12493049053044375, 138799253740521843, -613221795981706275, 1347119637570231525, -1570320948552481125, 931766432052080625,  -221849150488590625))
+    u8 = 1 / 22191183337881600 * evalpoly(p2, (134790179652253125, -10960565081605263000, 157768535329832893644, -914113758588905038248, 2711772922412520971550, -4513690624987320777000, 4272845805510421639500, -2152114239059719935000, 448357133137441653125))
+    u9 = 1 / 263631258054033408000 * evalpoly(p2, (6427469716717690265625, -659033454841709672064375, 11921080954211358275362500, -87432034049652400520788332, 334380732677827878090447630, -741743213039573443221773250, 992115946599792610768672500, -790370708270219620781737500, 345821892003106984030190625, -64041091111686478524109375))
+    u10 = 1 / 88580102706155225088000 * evalpoly(p2, (9745329584487361980740625, -1230031256571145165088463750, 27299183373230345667273718125, -246750339886026017414509498824, 1177120360439828012193658602930, -3327704366990695147540934069220, 5876803711285273203043452095250, -6564241639632418015173104205000, 4513386761946134740461797128125, -1745632061522350031610173343750, 290938676920391671935028890625))
+    return evalpoly(-p/v, (u0, u1, u2, u3, u4, u5, u6, u7, u8, u9, u10))
+end
+
+
+    u0 = one(x)
+    u1 = p / 24 * (3 - 5*p^2) * -1 / v
+    u2 = p^2 / 1152 * (81 - 462*p^2 + 385*p^4) / v^2
+    u3 = p^3 / 414720 * (30375 - 369603 * p^2 + 765765*p^4 - 425425*p^6) * -1 / v^3
+    u4 = p^4 / 39813120 * (4465125 - 94121676*p^2 + 349922430*p^4 - 446185740*p^6 + 185910725*p^8) / v^4
+    u5 = p^5 / 6688604160 * (-188699385875*p^10 + 566098157625*p^8 - 614135872350*p^6 + 284499769554*p^4 - 49286948607*p^2 + 1519035525) * -1 / v^5
+    u6 = p^6 / 4815794995200 * (1023694168371875*p^12 - 3685299006138750*p^10 + 5104696716244125*p^8 - 3369032068261860*p^6 + 1050760774457901*p^4 - 127577298354750*p^2 + 2757049477875) * 1 / v^6
+    u7 = p^7 / 115579079884800 * (-221849150488590625*p^14 + 931766432052080625*p^12 - 1570320948552481125*p^10 + 1347119637570231525*p^8 - 613221795981706275*p^6 + 138799253740521843*p^4 - 12493049053044375*p^2 + 199689155040375) * -1 / v^7
+    u8 = p^8 / 22191183337881600 * (448357133137441653125*p^16 - 2152114239059719935000*p^14 + 4272845805510421639500*p^12 - 4513690624987320777000*p^10 + 2711772922412520971550*p^8 - 914113758588905038248*p^6 + 157768535329832893644*p^4 - 10960565081605263000*p^2 + 134790179652253125) * 1 / v^8
+    u9 = p^9 / 263631258054033408000 * (-64041091111686478524109375*p^18 + 345821892003106984030190625*p^16 - 790370708270219620781737500*p^14 + 992115946599792610768672500*p^12 - 741743213039573443221773250*p^10 + 334380732677827878090447630*p^8 - 87432034049652400520788332*p^6 + 11921080954211358275362500*p^4 - 659033454841709672064375*p^2 + 6427469716717690265625) * -1 / v^9
+    u10 = p^10 / 88580102706155225088000 * (290938676920391671935028890625*p^20 - 1745632061522350031610173343750*p^18 + 4513386761946134740461797128125*p^16 - 6564241639632418015173104205000*p^14 + 5876803711285273203043452095250*p^12 - 3327704366990695147540934069220*p^10 + 1177120360439828012193658602930*p^8 - 246750339886026017414509498824*p^6 + 27299183373230345667273718125*p^4 - 1230031256571145165088463750*p^2 + 9745329584487361980740625) * 1 / v^10
+    u11 = p^11 / 27636992044320430227456000 * (-1363312191078326248171914924296875*p^22 + 8997860461116953237934638500359375*p^20 - 25963913760458280822131901737878125*p^18 + 42936745153513007436411401865860625*p^16 - 44801790321820682384740638703320750*p^14 + 30589806122850866110941936529264950*p^12 - 13704902022868787041097596217578170*p^10
+        + 3926191452593448964331218647028194*p^8 - 676389476843440422173605288596087*p^6 + 62011003282542082472252466220875*p^4 - 2321657500166464779660536015625*p^2 + 15237265774872558064250728125) * -1 / v^11
+    u12 = p^12 / 39797268543821419527536640000 * (32426380464797989812768996474401171875*p^24 - 233469939346545526651936774615688437500*p^22 + 743739612850105971846901081692858843750*p^20 - 1378260730939829908037976894025628887500*p^18 + 1642838631056253395899341314188134178125*p^16 - 1314368459332124683504418467809129387000*p^14
+        + 714528665351965363868467348116538170900*p^12 - 261201165596865827608687437905740780920*p^10 + 62055079517573388459132793029571876461*p^8 - 8958590476947726766450604043798559500*p^6 + 692277766674325563109617997687563750*p^4 - 21882222767154197351962677311437500*p^2 + 120907703923613748239829527671875) * 1 / v^12
+    u13 = p^13 / 955134445051714068660879360000 * (-14020668045586884672121117629431460546875*p^26 + 109361210755577700442544717509565392265625*p^24 - 381190503845282445953419057314665534156250*p^22 + 782463969315283937856703223178540650343750*p^20
+        - 1049095945162229046324321461816274931715625*p^18 + 962926533925253374359704288384340809260875*p^16 - 616410216242554698436702353237166008656700*p^14 + 274983827478138958623041508409195988431140*p^12 - 83924867223075156862785921508524155665245*p^10
+        + 16843538631795795357786827345307534156063*p^8 - 2069933923586966756183324291232117362250*p^6 + 136735019134677724428035696765082813750*p^4 - 3698121486504259988094897605296209375*p^2 + 17438611142828905996129258798828125) * -1 / v^13
+    u14 = p^14 / 45846453362482275295722209280000 * (13133360053559028970728309756597440972265625*p^28 - 110320224449895843354117801955418504167031250*p^26 + 417630985812040040477569028872405152769921875*p^24
+        - 940627071986145750405920450097257805227812500*p^22 + 1401302601668131482630653233972052729323190625*p^20 - 1451823699927947602004297385351260623500372750*p^18 + 1070439683260179398514645952824098811025619475*p^16
+        - 564850830044980230366140582063618983657685400*p^14 + 211477117385619365164298957821904115470535555*p^12 - 54857705817658080981995319669299096598482382*p^10 + 9440449669103391509091075981237243128469201*p^8
+        - 1000503839668383458999731491914516564625300*p^6 + 57170953417612444837142230812990944671875*p^4 - 1338184074771428116079233795614103631250*p^2 + 5448320367052402487647812713291015625) * 1 / v^14
+    u15 = p^15 / 9820310310243703368343697227776000000 * (- 59115551939078562227317999668652486368337724609375*p^30 + 532039967451707060045861997017872377315039521484375*p^28 - 2173722139119126857509156976742601742357422740234375*p^26
+        + 5329871927856528282113994744458999865006055974609375*p^24 - 8735135969643867540297524795790262235822823374296875*p^22 + 10085018700249896522602267572484630409640764997271875*p^20 - 8420533422834140468835467666391400380550043688871875*p^18
+        + 5136561256208409671660362298619778869859994724706875*p^16 - 2284251621937242886581917667066122422330060024456125*p^14 + 730367145705123976114617970888707594104468177381925*p^12 - 163359140754958502896104062604202934925448173291477*p^10
+        + 24403480234538299231733883413666768614198435948125*p^8 - 2254933495791765108580529087615802250458013685625*p^6 + 112597271053778779753048514469995937998172890625*p^4 - 2303431987527333128955769182299845911305390625*p^2 + 8178936810213560828419581728001773291015625) * -1 / v^15
+
+=#
diff --git a/src/besseli.jl b/src/besseli.jl
@@ -4,6 +4,9 @@
 #    Scaled modified Bessel functions of the first kind of order zero and one
 #                       besseli0x, besselix
 #
+#    (Scaled) Modified Bessel functions of the first kind of order nu
+#                       besseli, besselix
+#
 #    Calculation of besseli0 is done in two branches using polynomial approximations [1]
 #
 #    Branch 1: x < 7.75 
@@ -35,11 +38,18 @@
 #
 #    Horner's scheme is then used to evaluate all polynomials.
 #    ArbNumerics.jl is used as the reference bessel implementations with 75 digits.
+#
+#    Calculation of besseli and besselkx can be done with downward recursion starting with
+#    besseli_{nu+1} and besseli_{nu}. Higher orders are determined by a uniform asymptotic
+#    expansion similar to besselk (see notes there) using Equation 10.41.3 [3].
+#
 # 
 # [1] "Rational Approximations for the Modified Bessel Function of the First Kind 
 #     - I0(x) for Computations with Double Precision" by Pavel Holoborodko     
 # [2] "Rational Approximations for the Modified Bessel Function of the First Kind 
-#     - I1(x) for Computations with Double Precision" by Pavel Holoborodko        
+#     - I1(x) for Computations with Double Precision" by Pavel Holoborodko
+# [3] https://dlmf.nist.gov/10.41
+
 """
     besseli0(x::T) where T <: Union{Float32, Float64}
 
@@ -56,6 +66,7 @@ function besseli0(x::T) where T <: Union{Float32, Float64}
         return a * s
     end
 end
+
 """
     besseli0x(x::T) where T <: Union{Float32, Float64}
 
@@ -73,6 +84,7 @@ function besseli0x(x::T) where T <: Union{Float32, Float64}
         return evalpoly(inv(x), besseli0_large_coefs(T)) / sqrt(x)
     end
 end
+
 """
     besseli1(x::T) where T <: Union{Float32, Float64}
 
@@ -94,6 +106,7 @@ function besseli1(x::T) where T <: Union{Float32, Float64}
     end
     return z
 end
+
 """
     besseli1x(x::T) where T <: Union{Float32, Float64}
 
@@ -116,3 +129,67 @@ function besseli1x(x::T) where T <: Union{Float32, Float64}
     end
     return z
 end
+
+"""
+    besseli(nu, x::T) where T <: Union{Float32, Float64}
+
+Modified Bessel function of the first kind of order nu, ``I_{nu}(x)``.
+Nu must be real.
+"""
+function besseli(nu, x::T) where T <: Union{Float32, Float64, BigFloat}
+    nu == 0 && return besseli0(x)
+    nu == 1 && return besseli1(x)
+
+    branch = 30
+    if nu < branch
+        inp1 = besseli_large_orders(branch + 1, x)
+        in = besseli_large_orders(branch, x)
+        return down_recurrence(x, in, inp1, nu, branch)
+    else
+        return besseli_large_orders(nu, x)
+    end
+end
+
+"""
+    besselix(nu, x::T) where T <: Union{Float32, Float64}
+
+Scaled modified Bessel function of the first kind of order nu, ``I_{nu}(x)*e^{-x}``.
+Nu must be real.
+"""
+function besselix(nu, x::T) where T <: Union{Float32, Float64, BigFloat}
+    nu == 0 && return besseli0x(x)
+    nu == 1 && return besseli1x(x)
+
+    branch = 30
+    if nu < branch
+        inp1 = besseli_large_orders_scaled(branch + 1, x)
+        in = besseli_large_orders_scaled(branch, x)
+        return down_recurrence(x, in, inp1, nu, branch)
+    else
+        return besseli_large_orders_scaled(nu, x)
+    end
+end
+function besseli_large_orders(v, x::T) where T <: Union{Float32, Float64, BigFloat}
+    S = promote_type(T, Float64)
+    x = S(x)
+    z = x / v
+    zs = hypot(1, z)
+    n = zs + log(z) - log1p(zs)
+    coef = SQ1O2PI(S) * sqrt(inv(v)) * exp(v*n) / sqrt(zs)
+    p = inv(zs)
+    p2  = v^2/fma(max(v,x), max(v,x), min(v,x)^2)
+
+    return T(coef*Uk_poly_In(p, v, p2, T))
+end
+function besseli_large_orders_scaled(v, x::T) where T <: Union{Float32, Float64, BigFloat}
+    S = promote_type(T, Float64)
+    x = S(x)
+    z = x / v
+    zs = hypot(1, z)
+    n = zs + log(z) - log1p(zs)
+    coef = SQ1O2PI(S) * sqrt(inv(v)) * exp(v*n - x) / sqrt(zs)
+    p = inv(zs)
+    p2  = v^2/fma(max(v,x), max(v,x), min(v,x)^2)
+
+    return T(coef*Uk_poly_In(p, v, p2, T))
+end
diff --git a/src/besselk.jl b/src/besselk.jl
@@ -41,20 +41,36 @@
 #                    K_{nu+1} = (2*nu/x)*K_{nu} + K_{nu-1}
 #
 #    When nu is large, a large amount of recurrence is necesary.
-#    At this point as nu -> Inf it is more efficient to use a uniform expansion.
-#    The boundary of the expansion depends on the accuracy required.
-#    For more information see [4]. This approach is not yet implemented, so recurrence
-#    is used for all values of nu.
-#
-# 
+#    We consider uniform asymptotic expansion for large orders to more efficiently
+#    compute besselk(nu, x) when nu is larger than 100 (Let's double check this cutoff)
+#    The boundary should be carefully determined for accuracy and machine roundoff.
+#    We use 10.41.4 from the Digital Library of Math Functions [5].
+#    This is also 9.7.8 in Abramowitz and Stegun [6].
+#    K_{nu}(nu*z) = sqrt(pi / 2nu) *exp(-nu*n)/(1+z^2)^1/4 * sum((-1^k)U_k(p) /nu^k)) for k=0 -> infty
+#    The U polynomials are the most tricky. They are listed up to order 4 in Table 9.39
+#    of [6]. For Float32, >=4 U polynomials are usually necessary. For Float64 values,
+#    >= 8 orders are needed. However, this largely depends on the cutoff of order you need.
+#    For moderatelly sized orders (nu=50), might need 11-12 orders to reach good enough accuracy
+#    in double precision. 
+#
+#    However, calculation of these higher order U polynomials are tedious. These have been hand
+#    calculated and somewhat crosschecked with symbolic math. There could be errors. They are listed
+#    in src/U_polynomials.jl as a reference as higher orders are impossible to find while being needed for any meaningfully accurate calculation.
+#    For large orders these formulas will converge much faster than using upward recurrence.
+#
+#    
 # [1] "Rational Approximations for the Modified Bessel Function of the Second Kind 
 #     - K0(x) for Computations with Double Precision" by Pavel Holoborodko     
 # [2] "Rational Approximations for the Modified Bessel Function of the Second Kind 
 #     - K1(x) for Computations with Double Precision" by Pavel Holoborodko
 # [3] https://github.com/boostorg/math/tree/develop/include/boost/math/special_functions/detail
-# [4] "Computation of Bessel Functions of Complex Argument and Large ORder" by Donald E. Amos
+# [4] "Computation of Bessel Functions of Complex Argument and Large Order" by Donald E. Amos
 #      Sandia National Laboratories
+# [5] https://dlmf.nist.gov/10.41
+# [6] Abramowitz, Milton, and Irene A. Stegun, eds. Handbook of mathematical functions with formulas, graphs, and mathematical tables. 
+#     Vol. 55. US Government printing office, 1964.
 #
+
 """
     besselk0(x::T) where T <: Union{Float32, Float64}
 
@@ -130,46 +146,61 @@ function besselk1x(x::T) where T <: Union{Float32, Float64}
     end
 end
 #=
-Recurrence is used for all values of nu. If besselk0(x) or besselk1(0) is equal to zero
+If besselk0(x) or besselk1(0) is equal to zero
 this will underflow and always return zero even if besselk(x, nu)
 is larger than the smallest representing floating point value.
 In other words, for large values of x and small to moderate values of nu,
 this routine will underflow to zero.
 For small to medium values of x and large values of nu this will overflow and return Inf.
-
-In the future, a more efficient algorithm for large nu should be incorporated.
 =#
 """
     besselk(x::T) where T <: Union{Float32, Float64}
 
 Modified Bessel function of the second kind of order nu, ``K_{nu}(x)``.
 """
-function besselk(nu::Int, x::T) where T <: Union{Float32, Float64}
-    return three_term_recurrence(x, besselk0(x), besselk1(x), nu)
+function besselk(nu, x::T) where T <: Union{Float32, Float64, BigFloat}
+    T == Float32 ? branch = 20 : branch = 50
+    if nu < branch
+        return up_recurrence(x, besselk0(x), besselk1(x), nu)
+    else
+        return besselk_large_orders(nu, x)
+    end
 end
+
 """
     besselk(x::T) where T <: Union{Float32, Float64}
 
 Scaled modified Bessel function of the second kind of order nu, ``K_{nu}(x)*e^{x}``.
 """
 function besselkx(nu::Int, x::T) where T <: Union{Float32, Float64}
-    return three_term_recurrence(x, besselk0x(x), besselk1x(x), nu)
+    T == Float32 ? branch = 20 : branch = 50
+    if nu < branch
+        return up_recurrence(x, besselk0x(x), besselk1x(x), nu)
+    else
+        return besselk_large_orders_scaled(nu, x)
+    end
 end
+function besselk_large_orders(v, x::T) where T <: Union{Float32, Float64, BigFloat}
+    S = promote_type(T, Float64)
+    x = S(x)
+    z = x / v
+    zs = hypot(1, z)
+    n = zs + log(z) - log1p(zs)
+    coef = SQPIO2(S) * sqrt(inv(v)) * exp(-v*n) / sqrt(zs)
+    p = inv(zs)
+    p2  = v^2/fma(max(v,x), max(v,x), min(v,x)^2)
 
-@inline function three_term_recurrence(x, k0, k1, nu)
-    nu == 0 && return k0
-    nu == 1 && return k1
-
-    # this prevents us from looping through large values of nu when the loop will always return zero
-    (iszero(k0) || iszero(k1)) && return zero(x) 
-
-    k2 = k0
-    x2 = 2 / x
-    for n in 1:nu-1
-        a = x2 * n
-        k2 = muladd(a, k1, k0)
-        k0 = k1
-        k1 = k2
-    end
-    return k2
+    return T(coef*Uk_poly_Kn(p, v, p2, T))
 end
+function besselk_large_orders_scaled(v, x::T) where T <: Union{Float32, Float64, BigFloat}
+    S = promote_type(T, Float64)
+    x = S(x)
+    z = x / v
+    zs = hypot(1, z)
+    n = zs + log(z) - log1p(zs)
+    coef = SQPIO2(S) * sqrt(inv(v)) * exp(-v*n + x) / sqrt(zs)
+    p = inv(zs)
+    p2  = v^2/fma(max(v,x), max(v,x), min(v,x)^2)
+
+    return T(coef*Uk_poly_Kn(p, v, p2, T))
+end
diff --git a/src/hankel.jl b/src/hankel.jl
@@ -1,3 +1,4 @@
+#= Aren't tested yet
 function besselh(nu::Float64, k::Integer, x::AbstractFloat)
     if k == 1
         return complex(besselj(nu, x), bessely(nu, x))
@@ -10,3 +11,4 @@ end
 
 hankelh1(nu, z) = besselh(nu, 1, z)
 hankelh2(nu, z) = besselh(nu, 2, z)
+=#