diff --git a/doc/sf/igamma.qbk b/doc/sf/igamma.qbk index 3895d64c31..1ce33d7a76 100644 --- a/doc/sf/igamma.qbk +++ b/doc/sf/igamma.qbk @@ -26,6 +26,12 @@ template BOOST_MATH_GPU_ENABLED ``__sf_result`` lgamma_q(T1 a, T2 z, const ``__Policy``&); + template + BOOST_MATH_GPU_ENABLED ``__sf_result`` lgamma_p(T1 a, T2 z); + + template + BOOST_MATH_GPU_ENABLED ``__sf_result`` lgamma_p(T1 a, T2 z, const ``__Policy``&); + template BOOST_MATH_GPU_ENABLED ``__sf_result`` tgamma_lower(T1 a, T2 z); @@ -72,6 +78,15 @@ This function changes rapidly from 0 to 1 around the point z == a: [graph gamma_p] + template + BOOST_MATH_GPU_ENABLED ``__sf_result`` lgamma_p(T1 a, T2 z); + + template + BOOST_MATH_GPU_ENABLED ``__sf_result`` lgamma_p(T1 a, T2 z, const ``__Policy``&); + +Returns the natural log of the normalized lower incomplete gamma function +of a and z. + template BOOST_MATH_GPU_ENABLED ``__sf_result`` gamma_q(T1 a, T2 z); diff --git a/include/boost/math/special_functions/gamma.hpp b/include/boost/math/special_functions/gamma.hpp index 458dc40a3d..a9c17da882 100644 --- a/include/boost/math/special_functions/gamma.hpp +++ b/include/boost/math/special_functions/gamma.hpp @@ -1290,7 +1290,6 @@ BOOST_MATH_GPU_ENABLED T incomplete_tgamma_large_x(const T& a, const T& x, const return result; } - // // Main incomplete gamma entry point, handles all four incomplete gamma's: // @@ -1813,6 +1812,46 @@ BOOST_MATH_GPU_ENABLED T lgamma_incomplete_imp(T a, T x, const Policy& pol) return log(gamma_q(a, x, pol)); } +// Calculate log of incomplete gamma function +template +BOOST_MATH_GPU_ENABLED T lgamma_incomplete_lower_imp(T a, T x, const Policy& pol) +{ + using namespace boost::math; // temporary until we're in the right namespace + + BOOST_MATH_STD_USING_CORE + + // Check for invalid inputs (a < 0 or x < 0) + constexpr auto function = "boost::math::lgamma_p<%1%>(%1%, %1%)"; + if(a <= 0) + return policies::raise_domain_error(function, "Argument a to the incomplete gamma function must be greater than zero (got a=%1%).", a, pol); + if(x < 0) + return policies::raise_domain_error(function, "Argument x to the incomplete gamma function must be >= 0 (got x=%1%).", x, pol); + + // Taken from conditions on Line 1709. There are also + // conditions on Line 1368, but didn't implement that one here. + // The second condition ensures floats do not return -inf for small + // values of x. + if (((a > 4 * x) && (a >= max_factorial::value)) || ((T(-0.4) / log(x) < a) && (x < T(1.0)))){ + return log(detail::lower_gamma_series(a, x, pol)) - log(a) + a * log(x) - x - lgamma(a, pol); + } + + // + // Can't do better than taking the log of P, but... + // + // Figure out whether we need P or Q, since if we calculate P and it's too close to unity + // we will lose precision in the result, selection logic here is extracted from gamma_incomplete_imp_final: + // + bool need_p = false; + if ((x < 1.1) && (x >= 0.5) && (x * 0.75f < a)) + need_p = true; + else if ((x < a) && (x >= 1.1)) + need_p = true; + + if (need_p) + return log(gamma_p(a, x, pol)); + return log1p(-gamma_q(a, x, pol), pol); +} + // // Ratios of two gamma functions: // @@ -2454,6 +2493,29 @@ BOOST_MATH_GPU_ENABLED inline tools::promote_args_t lgamma_q(T1 a, T2 z) { return lgamma_q(a, z, policies::policy<>()); } + +template +BOOST_MATH_GPU_ENABLED inline tools::promote_args_t lgamma_p(T1 a, T2 z, const Policy& /* pol */) +{ + typedef tools::promote_args_t result_type; + typedef typename policies::evaluation::type value_type; + typedef typename policies::normalise< + Policy, + policies::promote_float, + policies::promote_double, + policies::discrete_quantile<>, + policies::assert_undefined<> >::type forwarding_policy; + + return policies::checked_narrowing_cast( + detail::lgamma_incomplete_lower_imp(static_cast(a), + static_cast(z), forwarding_policy()), "lgamma_p<%1%>(%1%, %1%)"); +} + +template +BOOST_MATH_GPU_ENABLED inline tools::promote_args_t lgamma_p(T1 a, T2 z) +{ + return lgamma_p(a, z, policies::policy<>()); +} // // Regularised lower incomplete gamma: // diff --git a/include/boost/math/special_functions/math_fwd.hpp b/include/boost/math/special_functions/math_fwd.hpp index ca16fc8d96..6c8394ddf9 100644 --- a/include/boost/math/special_functions/math_fwd.hpp +++ b/include/boost/math/special_functions/math_fwd.hpp @@ -567,6 +567,12 @@ namespace boost template BOOST_MATH_GPU_ENABLED tools::promote_args_t lgamma_q(RT1 a, RT2 z, const Policy&); + template + BOOST_MATH_GPU_ENABLED tools::promote_args_t lgamma_p(RT1 a, RT2 z); + + template + BOOST_MATH_GPU_ENABLED tools::promote_args_t lgamma_p(RT1 a, RT2 z, const Policy&); + template BOOST_MATH_GPU_ENABLED tools::promote_args_t gamma_p(RT1 a, RT2 z); @@ -1525,6 +1531,9 @@ namespace boost \ template \ BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t lgamma_q(RT1 a, RT2 z){ return boost::math::lgamma_q(a, z, Policy()); }\ +\ + template \ + BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t lgamma_p(RT1 a, RT2 z){ return boost::math::lgamma_p(a, z, Policy()); }\ \ template \ BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t gamma_p(RT1 a, RT2 z){ return boost::math::gamma_p(a, z, Policy()); }\ diff --git a/test/compile_test/instantiate.hpp b/test/compile_test/instantiate.hpp index 1d186aff7f..96c8964866 100644 --- a/test/compile_test/instantiate.hpp +++ b/test/compile_test/instantiate.hpp @@ -264,6 +264,7 @@ void instantiate(RealType) boost::math::gamma_p(v1, v2); boost::math::gamma_q(v1, v2); boost::math::lgamma_q(v1, v2); + boost::math::lgamma_p(v1, v2); boost::math::gamma_p_inv(v1, v2); boost::math::gamma_q_inv(v1, v2); boost::math::gamma_p_inva(v1, v2); @@ -544,6 +545,7 @@ void instantiate(RealType) boost::math::gamma_p(v1 * 1, v2 + 0); boost::math::gamma_q(v1 * 1, v2 + 0); boost::math::lgamma_q(v1 * 1, v2 + 0); + boost::math::lgamma_p(v1 * 1, v2 + 0); boost::math::gamma_p_inv(v1 * 1, v2 + 0); boost::math::gamma_q_inv(v1 * 1, v2 + 0); boost::math::gamma_p_inva(v1 * 1, v2 + 0); @@ -796,6 +798,7 @@ void instantiate(RealType) boost::math::gamma_p(v1, v2, pol); boost::math::gamma_q(v1, v2, pol); boost::math::lgamma_q(v1, v2, pol); + boost::math::lgamma_p(v1, v2, pol); boost::math::gamma_p_inv(v1, v2, pol); boost::math::gamma_q_inv(v1, v2, pol); boost::math::gamma_p_inva(v1, v2, pol); @@ -1074,6 +1077,7 @@ void instantiate(RealType) test::gamma_p(v1, v2); test::gamma_q(v1, v2); test::lgamma_q(v1, v2); + test::lgamma_p(v1, v2); test::gamma_p_inv(v1, v2); test::gamma_q_inv(v1, v2); test::gamma_p_inva(v1, v2); @@ -1356,6 +1360,7 @@ void instantiate_mixed(RealType) boost::math::gamma_p(fr, lr); boost::math::gamma_q(i, s); boost::math::lgamma_q(i, s); + boost::math::lgamma_p(i, s); boost::math::gamma_q(fr, lr); boost::math::gamma_p_inv(i, fr); boost::math::gamma_q_inv(s, fr); @@ -1572,6 +1577,7 @@ void instantiate_mixed(RealType) boost::math::gamma_p(fr, lr, pol); boost::math::gamma_q(i, s, pol); boost::math::lgamma_q(i, s, pol); + boost::math::lgamma_p(i, s, pol); boost::math::gamma_q(fr, lr, pol); boost::math::gamma_p_inv(i, fr, pol); boost::math::gamma_q_inv(s, fr, pol); @@ -1784,8 +1790,10 @@ void instantiate_mixed(RealType) test::gamma_p(fr, lr); test::gamma_q(i, s); test::lgamma_q(i, s); + test::lgamma_p(i, s); test::gamma_q(fr, lr); test::lgamma_q(fr, lr); + test::lgamma_p(fr, lr); test::gamma_p_inv(i, fr); test::gamma_q_inv(s, fr); test::gamma_p_inva(i, lr); diff --git a/test/compile_test/sf_gamma_incl_test.cpp b/test/compile_test/sf_gamma_incl_test.cpp index 74ab85b2df..85045c857d 100644 --- a/test/compile_test/sf_gamma_incl_test.cpp +++ b/test/compile_test/sf_gamma_incl_test.cpp @@ -45,6 +45,12 @@ void compile_and_link_test() check_result(boost::math::lgamma_q(l, l)); #endif +check_result(boost::math::lgamma_p(f, f)); +check_result(boost::math::lgamma_p(d, d)); +#ifndef BOOST_MATH_NO_LONG_DOUBLE_MATH_FUNCTIONS + check_result(boost::math::lgamma_p(l, l)); +#endif + check_result(boost::math::gamma_p_inv(f, f)); check_result(boost::math::gamma_p_inv(d, d)); #ifndef BOOST_MATH_NO_LONG_DOUBLE_MATH_FUNCTIONS diff --git a/test/cuda_jamfile b/test/cuda_jamfile index 4082057fa5..17ce1647b3 100644 --- a/test/cuda_jamfile +++ b/test/cuda_jamfile @@ -371,6 +371,8 @@ run test_gamma_p_inv_double.cu ; run test_gamma_p_inv_float.cu ; run test_lgamma_q_double.cu ; run test_lgamma_q_float.cu ; +run test_lgamma_p_double.cu ; +run test_lgamma_p_float.cu ; run test_log1p_double.cu ; run test_log1p_float.cu ; diff --git a/test/test_igamma.hpp b/test/test_igamma.hpp index 88f0bca174..74256fed25 100644 --- a/test/test_igamma.hpp +++ b/test/test_igamma.hpp @@ -263,7 +263,19 @@ void test_spots(T, const char* name = nullptr) BOOST_CHECK_CLOSE(::boost::math::lgamma_q(static_cast(501.25), static_cast(2000)), static_cast(-810.2453406781655559126505101822969531699112391075198076300675402L), tolerance); BOOST_CHECK_CLOSE(::boost::math::lgamma_q(static_cast(20), static_cast(0.25)), static_cast(-2.946458104491857816330873290969917497748067639461638294404e-31L), tolerance); BOOST_CHECK_CLOSE(::boost::math::lgamma_q(static_cast(40), static_cast(0.75)), static_cast(-5.930604927955460343652485525435087275997461623988991819824e-54L), tolerance); -#if defined(__CYGWIN__) || defined(__MINGW32__) + + // + // Check that lgamma_q returns correct values with spot values calculated via wolframalpha log(P[a, x]) + // This is calculated using: N[Log[GammaRegularized[a,0, z]],64] + // + BOOST_CHECK_CLOSE(::boost::math::lgamma_p(static_cast(500), static_cast(10)), static_cast(-1470.017750815998931281954666549641187420649099004671023115157832L), tolerance); + BOOST_CHECK_CLOSE(::boost::math::lgamma_p(static_cast(100), static_cast(0.25)), static_cast(-502.6163334118978895536207514636026023439623265152862757105793000L), tolerance); + BOOST_CHECK_CLOSE(::boost::math::lgamma_p(static_cast(20), static_cast(10.25)), static_cast(-5.404004887981642339930593767572610169901594898478031307722239712L), tolerance); + // Small "a" produce larger errors + BOOST_CHECK_CLOSE(::boost::math::lgamma_p(static_cast(0.25), static_cast(100)), static_cast(-3.220751038854414755009496530271388459559061551701603447517040280e-46L), tolerance); + BOOST_CHECK_CLOSE(::boost::math::lgamma_p(static_cast(0.25), static_cast(10)), static_cast(-2.083032578160285760530530498275075010777428544413918699832758176e-6L), tolerance); + + #if defined(__CYGWIN__) || defined(__MINGW32__) T gcc_win_mul = 2; #else T gcc_win_mul = 1; @@ -287,6 +299,24 @@ void test_spots(T, const char* name = nullptr) BOOST_CHECK_CLOSE(::boost::math::lgamma_q(static_cast(1200), static_cast(1250.25)), static_cast(-2.591934862117586205519309712218581885256650074210410262843591453L), tolerance * ((std::numeric_limits::max_digits10 >= 36 || std::is_same::value) ? 750 : (std::is_same::value ? 1 : 50))); // Test fails on ARM64 and s390x long doubles and real_concept types unless tolerance is adjusted BOOST_CHECK_CLOSE(::boost::math::lgamma_q(static_cast(2200), static_cast(2249.75)), static_cast(-1.933779894897391651410597618307863427927461116308937004149240320L), tolerance * (std::is_floating_point::value ? 1 : 10)); BOOST_CHECK_CLOSE(::boost::math::lgamma_q(static_cast(2200), static_cast(2250.25)), static_cast(-1.950346484067948344620463026377077515919992808640737320057812268L), tolerance * (std::is_same::value ? 1 : (std::is_floating_point::value ? 100 : 200))); + + // Long double and real_concept types need increased precision + T real_concept_tol = 1; + if (std::is_same::value || std::is_same::value){ + real_concept_tol = 3; + } + // Pair of tests that bisect the crossover condition in our code at double and then quad precision + // Oddly, the crossover condition is smaller for quad precision. This is because max_factorial is 100 + // for boost::multiprecision::cpp_bin_float_quad and 170 for doubles. + BOOST_CHECK_CLOSE(::boost::math::lgamma_p(static_cast(169.75), static_cast(0.75)), static_cast(-754.8681912874632573100058312311927462406154378562940316233389406L), tolerance * real_concept_tol); + BOOST_CHECK_CLOSE(::boost::math::lgamma_p(static_cast(170.25), static_cast(0.75)), static_cast(-757.5814133895304434271729579978676692688834086380018151200693572L), tolerance * real_concept_tol); + BOOST_CHECK_CLOSE(::boost::math::lgamma_p(static_cast(99.75), static_cast(0.75)), static_cast(-392.0259615581237826290999388631292473247947826682978959914359465L), tolerance * real_concept_tol); + BOOST_CHECK_CLOSE(::boost::math::lgamma_p(static_cast(100.25), static_cast(0.75)), static_cast(-394.4749200332583219473980963811639065003421270272773619742710832L), tolerance * real_concept_tol); + + // Check large a, x values. Precision just isn't great here. + BOOST_CHECK_CLOSE(::boost::math::lgamma_p(static_cast(1450.25), static_cast(1500.75)), static_cast(-0.09812447528127799786140178403478691390753413399549580160096975713L), tolerance * (std::is_same::value ? 16 : 1)); + BOOST_CHECK_CLOSE(::boost::math::lgamma_p(static_cast(2000), static_cast(1900)), static_cast(-4.448523733381445722945397105917814000790587922314824687065050805L), tolerance * gcc_win_mul * (std::is_same::value ? 8 : 1)); + // // Coverage: // @@ -302,6 +332,10 @@ void test_spots(T, const char* name = nullptr) BOOST_CHECK_THROW(boost::math::lgamma_q(static_cast(1), static_cast(-2)), std::domain_error); BOOST_CHECK_THROW(boost::math::lgamma_q(static_cast(0), static_cast(2)), std::domain_error); + BOOST_CHECK_THROW(boost::math::lgamma_p(static_cast(-1), static_cast(2)), std::domain_error); + BOOST_CHECK_THROW(boost::math::lgamma_p(static_cast(1), static_cast(-2)), std::domain_error); + BOOST_CHECK_THROW(boost::math::lgamma_p(static_cast(0), static_cast(2)), std::domain_error); + BOOST_CHECK_THROW(boost::math::gamma_p_derivative(static_cast(-1), static_cast(2)), std::domain_error); BOOST_CHECK_THROW(boost::math::gamma_p_derivative(static_cast(1), static_cast(-2)), std::domain_error); BOOST_CHECK_THROW(boost::math::gamma_p_derivative(static_cast(0), static_cast(2)), std::domain_error); @@ -317,6 +351,10 @@ void test_spots(T, const char* name = nullptr) BOOST_CHECK((boost::math::isnan)(boost::math::lgamma_q(static_cast(1), static_cast(-2)))); BOOST_CHECK((boost::math::isnan)(boost::math::lgamma_q(static_cast(0), static_cast(2)))); + BOOST_CHECK((boost::math::isnan)(boost::math::lgamma_p(static_cast(-1), static_cast(2)))); + BOOST_CHECK((boost::math::isnan)(boost::math::lgamma_p(static_cast(1), static_cast(-2)))); + BOOST_CHECK((boost::math::isnan)(boost::math::lgamma_p(static_cast(0), static_cast(2)))); + BOOST_CHECK((boost::math::isnan)(boost::math::gamma_p_derivative(static_cast(-1), static_cast(2)))); BOOST_CHECK((boost::math::isnan)(boost::math::gamma_p_derivative(static_cast(1), static_cast(-2)))); BOOST_CHECK((boost::math::isnan)(boost::math::gamma_p_derivative(static_cast(0), static_cast(2)))); diff --git a/test/test_lgamma_p_double.cu b/test/test_lgamma_p_double.cu new file mode 100644 index 0000000000..5638e01a21 --- /dev/null +++ b/test/test_lgamma_p_double.cu @@ -0,0 +1,102 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::lgamma_p(in[i], in[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::lgamma_p(input_vector[i], input_vector[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_lgamma_p_float.cu b/test/test_lgamma_p_float.cu new file mode 100644 index 0000000000..2c0dd91df4 --- /dev/null +++ b/test/test_lgamma_p_float.cu @@ -0,0 +1,102 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::lgamma_p(in[i], in[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::lgamma_p(input_vector[i], input_vector[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +}