diff --git a/doc/sf/igamma.qbk b/doc/sf/igamma.qbk index 4675928e63..3895d64c31 100644 --- a/doc/sf/igamma.qbk +++ b/doc/sf/igamma.qbk @@ -20,6 +20,12 @@ template BOOST_MATH_GPU_ENABLED ``__sf_result`` gamma_q(T1 a, T2 z, const ``__Policy``&); + template + BOOST_MATH_GPU_ENABLED ``__sf_result`` lgamma_q(T1 a, T2 z); + + template + BOOST_MATH_GPU_ENABLED ``__sf_result`` lgamma_q(T1 a, T2 z, const ``__Policy``&); + template BOOST_MATH_GPU_ENABLED ``__sf_result`` tgamma_lower(T1 a, T2 z); @@ -80,6 +86,15 @@ This function changes rapidly from 1 to 0 around the point z == a: [graph gamma_q] + template + BOOST_MATH_GPU_ENABLED ``__sf_result`` lgamma_q(T1 a, T2 z); + + template + BOOST_MATH_GPU_ENABLED ``__sf_result`` lgamma_q(T1 a, T2 z, const ``__Policy``&); + +Returns the natural log of the normalized upper incomplete gamma function +of a and z. + template BOOST_MATH_GPU_ENABLED ``__sf_result`` tgamma_lower(T1 a, T2 z); @@ -263,6 +278,16 @@ large a and x the errors will still get you eventually, although this does delay the inevitable much longer than other methods. Use of /log(1+x)-x/ here is inspired by Temme (see references below). +The natural log of the normalized upper incomplete gamma function is computed +as expected except when the normalized upper incomplete gamma function +begins to underflow. This approximately occurs at + + ((x > 1000) && ((a < x) || (fabs(a - 50) / x < 1))) || ((x > log_max_value() - 10) && (x > a)) + +in which case an expansion, for large x, of the (non-normalised) upper +incomplete gamma function is used. The return is then normalised by subtracting +the log of the gamma function and adding /a log(x)-x-log(x)/. + [h4 References] * N. M. Temme, A Set of Algorithms for the Incomplete Gamma Functions, diff --git a/include/boost/math/special_functions/gamma.hpp b/include/boost/math/special_functions/gamma.hpp index 47b4f3d68d..458dc40a3d 100644 --- a/include/boost/math/special_functions/gamma.hpp +++ b/include/boost/math/special_functions/gamma.hpp @@ -1772,6 +1772,47 @@ BOOST_MATH_GPU_ENABLED T gamma_incomplete_imp(T a, T x, bool normalised, bool in return gamma_incomplete_imp_final(T(a), T(x), normalised, invert, pol, p_derivative); } +// Calculate log of incomplete gamma function +template +BOOST_MATH_GPU_ENABLED T lgamma_incomplete_imp(T a, T x, const Policy& pol) +{ + using namespace boost::math; // temporary until we're in the right namespace + + BOOST_MATH_STD_USING_CORE + + // Check for invalid inputs (a < 0 or x < 0) + constexpr auto function = "boost::math::lgamma_q<%1%>(%1%, %1%)"; + if(a <= 0) + return policies::raise_domain_error(function, "Argument a to the incomplete gamma function must be greater than zero (got a=%1%).", a, pol); + if(x < 0) + return policies::raise_domain_error(function, "Argument x to the incomplete gamma function must be >= 0 (got x=%1%).", x, pol); + + if (((x > 1000) || (x > tools::log_max_value() - 10)) && (a + 50 < x)) + { + // + // Take the logarithmic version of the asymtotic expansion: + // + return log(detail::incomplete_tgamma_large_x(a, x, pol)) + a * log(x) - x - lgamma(a, pol) - log(x); + } + // + // Can't do better than taking the log of Q, but... + // + // Figure out whether we need P or Q, since if we calculate Q and it's too close to unity + // we will lose precision in the result, selection logic here is extracted from gamma_incomplete_imp_final: + // + bool need_p = false; + if ((x < 0.5) && (T(-0.4) / log(x) < a)) + need_p = true; + else if ((x < 1.1) && (x >= 0.5) && (x * 0.75f < a)) + need_p = true; + else if ((x < a) && (x >= 1.1)) + need_p = true; + + if (need_p) + return log1p(-gamma_p(a, x, pol), pol); + return log(gamma_q(a, x, pol)); +} + // // Ratios of two gamma functions: // @@ -2390,6 +2431,29 @@ BOOST_MATH_GPU_ENABLED inline tools::promote_args_t { return gamma_q(a, z, policies::policy<>()); } + +template +BOOST_MATH_GPU_ENABLED inline tools::promote_args_t lgamma_q(T1 a, T2 z, const Policy& /* pol */) +{ + typedef tools::promote_args_t result_type; + typedef typename policies::evaluation::type value_type; + typedef typename policies::normalise< + Policy, + policies::promote_float, + policies::promote_double, + policies::discrete_quantile<>, + policies::assert_undefined<> >::type forwarding_policy; + + return policies::checked_narrowing_cast( + detail::lgamma_incomplete_imp(static_cast(a), + static_cast(z), forwarding_policy()), "lgamma_q<%1%>(%1%, %1%)"); +} + +template +BOOST_MATH_GPU_ENABLED inline tools::promote_args_t lgamma_q(T1 a, T2 z) +{ + return lgamma_q(a, z, policies::policy<>()); +} // // Regularised lower incomplete gamma: // diff --git a/test/cuda_jamfile b/test/cuda_jamfile index 02dcea8382..4082057fa5 100644 --- a/test/cuda_jamfile +++ b/test/cuda_jamfile @@ -369,6 +369,8 @@ run test_gamma_p_derivative_double.cu ; run test_gamma_p_derivative_float.cu ; run test_gamma_p_inv_double.cu ; run test_gamma_p_inv_float.cu ; +run test_lgamma_q_double.cu ; +run test_lgamma_q_float.cu ; run test_log1p_double.cu ; run test_log1p_float.cu ; diff --git a/test/test_igamma.cpp b/test/test_igamma.cpp index ddc37f0759..b113eb064c 100644 --- a/test/test_igamma.cpp +++ b/test/test_igamma.cpp @@ -394,13 +394,13 @@ BOOST_AUTO_TEST_CASE( test_main ) BOOST_MATH_CONTROL_FP; #ifndef BOOST_MATH_BUGGY_LARGE_FLOAT_CONSTANTS - test_spots(0.0F); + test_spots(0.0F, "float"); #endif - test_spots(0.0); + test_spots(0.0, "double"); #ifndef BOOST_MATH_NO_LONG_DOUBLE_MATH_FUNCTIONS - test_spots(0.0L); + test_spots(0.0L, "long double"); #ifndef BOOST_MATH_NO_REAL_CONCEPT_TESTS - test_spots(boost::math::concepts::real_concept(0.1)); + test_spots(boost::math::concepts::real_concept(0.1), "real_concept"); #endif #endif diff --git a/test/test_igamma.hpp b/test/test_igamma.hpp index 3459f71d92..febe16f216 100644 --- a/test/test_igamma.hpp +++ b/test/test_igamma.hpp @@ -18,7 +18,7 @@ #include #include #include "functor.hpp" - +#include #include "handle_test_result.hpp" #include "table_type.hpp" @@ -141,8 +141,9 @@ void test_gamma(T, const char* name) } template -void test_spots(T) +void test_spots(T, const char* name = nullptr) { + std::cout << "Testing spot values with type " << name << std::endl; // // basic sanity checks, tolerance is 10 epsilon expressed as a percentage: // @@ -256,6 +257,33 @@ void test_spots(T) BOOST_CHECK_EQUAL(::boost::math::gamma_q(static_cast(1770), static_cast(1e-12)), 1); BOOST_CHECK_EQUAL(::boost::math::gamma_p(static_cast(1770), static_cast(1e-12)), 0); // + // Check that lgamma_q returns correct values with spot values calculated via wolframalpha log(Q[a, x]) + // + BOOST_CHECK_CLOSE(::boost::math::lgamma_q(static_cast(5), static_cast(100)), static_cast(-84.71697591169848944613823640968965801339401810393519310714864307L), tolerance); + BOOST_CHECK_CLOSE(::boost::math::lgamma_q(static_cast(22.5), static_cast(2000)), static_cast(-1883.489773203771543025750308264545743305089849873060383828767138L), tolerance); + BOOST_CHECK_CLOSE(::boost::math::lgamma_q(static_cast(501.25), static_cast(2000)), static_cast(-810.2453406781655559126505101822969531699112391075198076300675402L), tolerance); + BOOST_CHECK_CLOSE(::boost::math::lgamma_q(static_cast(20), static_cast(0.25)), static_cast(-2.946458104491857816330873290969917497748067639461638294404e-31L), tolerance); + BOOST_CHECK_CLOSE(::boost::math::lgamma_q(static_cast(40), static_cast(0.75)), static_cast(-5.930604927955460343652485525435087275997461623988991819824e-54L), tolerance); + BOOST_CHECK_CLOSE(::boost::math::lgamma_q(static_cast(50), static_cast(2)), static_cast(-5.214301903317168085381693412994550732094621576607843973832e-51L), tolerance); + BOOST_CHECK_CLOSE(::boost::math::lgamma_q(static_cast(500), static_cast(10)), static_cast(-3.79666711621207197039397438773960431648625558027046365463e-639L), tolerance * 3); + BOOST_CHECK_CLOSE(::boost::math::lgamma_q(static_cast(5), static_cast(1000)), static_cast(-975.5430287171020511929200293377669175923128826278957569928895945L), tolerance); + // Pairs of tests that bisect the crossover condition in our code at double and then quad precision: + BOOST_CHECK_CLOSE(::boost::math::lgamma_q(static_cast(10), static_cast(698.75)), static_cast(-652.5952453102824132865663191324423994628428404928732148525545721L), tolerance); + BOOST_CHECK_CLOSE(::boost::math::lgamma_q(static_cast(10), static_cast(699.25)), static_cast(-653.0888168445921483147208556398158677077537551419551652934287016L), tolerance); + BOOST_CHECK_CLOSE(::boost::math::lgamma_q(static_cast(10), static_cast(999.75)), static_cast(-950.3752463850600415679327136010171192193400042422096029239012176L), tolerance); + BOOST_CHECK_CLOSE(::boost::math::lgamma_q(static_cast(10), static_cast(1000.25)), static_cast(-950.8707509166152482936275883547176592196662090187561681198668099L), tolerance); + BOOST_CHECK_CLOSE(::boost::math::lgamma_q(static_cast(50), static_cast(698.75)), static_cast(-522.3277960730837166223131189587863209730608668858212533099139269L), tolerance); + BOOST_CHECK_CLOSE(::boost::math::lgamma_q(static_cast(50), static_cast(699.25)), static_cast(-522.7927997457481265511084805522296021540768033975669071565674196L), tolerance); + BOOST_CHECK_CLOSE(::boost::math::lgamma_q(static_cast(50), static_cast(999.75)), static_cast(-805.7977867938474339107474131612354353193501692041340771552419902L), tolerance); + BOOST_CHECK_CLOSE(::boost::math::lgamma_q(static_cast(50), static_cast(1000.25)), static_cast(-806.2733124989172792095030711884568388681331032891850662521501582L), tolerance); + BOOST_CHECK_CLOSE(::boost::math::lgamma_q(static_cast(800), static_cast(999.75)), static_cast(-24.33274293617739453303937714319703386675839030466670622049929011L), tolerance * 2); + BOOST_CHECK_CLOSE(::boost::math::lgamma_q(static_cast(800), static_cast(1000.25)), static_cast(-24.43514173634027477093666725985191846106997808357863808910970142L), tolerance * (boost::math::tools::digits() > 54 ? 20 : 1)); + // Once we get large a,x then error start to accumulate no matter what we do: + BOOST_CHECK_CLOSE(::boost::math::lgamma_q(static_cast(1200), static_cast(1249.75)), static_cast(-2.565496161584661216769813239648606145255794643472303927896044375L), tolerance * (std::is_floating_point::value ? 1 : 4)); + BOOST_CHECK_CLOSE(::boost::math::lgamma_q(static_cast(1200), static_cast(1250.25)), static_cast(-2.591934862117586205519309712218581885256650074210410262843591453L), tolerance * ((std::numeric_limits::max_digits10 >= 36) ? 50 : (std::is_same::value ? 1 : 50))); + BOOST_CHECK_CLOSE(::boost::math::lgamma_q(static_cast(2200), static_cast(2249.75)), static_cast(-1.933779894897391651410597618307863427927461116308937004149240320L), tolerance * (std::is_floating_point::value ? 1 : 10)); + BOOST_CHECK_CLOSE(::boost::math::lgamma_q(static_cast(2200), static_cast(2250.25)), static_cast(-1.950346484067948344620463026377077515919992808640737320057812268L), tolerance * (std::is_same::value ? 1 : (std::is_floating_point::value ? 100 : 200))); + // // Coverage: // #ifndef BOOST_MATH_NO_EXCEPTIONS @@ -265,6 +293,11 @@ void test_spots(T) BOOST_CHECK_THROW(boost::math::gamma_q(static_cast(1), static_cast(-2)), std::domain_error); BOOST_CHECK_THROW(boost::math::gamma_p(static_cast(0), static_cast(2)), std::domain_error); BOOST_CHECK_THROW(boost::math::gamma_q(static_cast(0), static_cast(2)), std::domain_error); + + BOOST_CHECK_THROW(boost::math::lgamma_q(static_cast(-1), static_cast(2)), std::domain_error); + BOOST_CHECK_THROW(boost::math::lgamma_q(static_cast(1), static_cast(-2)), std::domain_error); + BOOST_CHECK_THROW(boost::math::lgamma_q(static_cast(0), static_cast(2)), std::domain_error); + BOOST_CHECK_THROW(boost::math::gamma_p_derivative(static_cast(-1), static_cast(2)), std::domain_error); BOOST_CHECK_THROW(boost::math::gamma_p_derivative(static_cast(1), static_cast(-2)), std::domain_error); BOOST_CHECK_THROW(boost::math::gamma_p_derivative(static_cast(0), static_cast(2)), std::domain_error); @@ -275,6 +308,11 @@ void test_spots(T) BOOST_CHECK((boost::math::isnan)(boost::math::gamma_q(static_cast(1), static_cast(-2)))); BOOST_CHECK((boost::math::isnan)(boost::math::gamma_p(static_cast(0), static_cast(2)))); BOOST_CHECK((boost::math::isnan)(boost::math::gamma_q(static_cast(0), static_cast(2)))); + + BOOST_CHECK((boost::math::isnan)(boost::math::lgamma_q(static_cast(-1), static_cast(2)))); + BOOST_CHECK((boost::math::isnan)(boost::math::lgamma_q(static_cast(1), static_cast(-2)))); + BOOST_CHECK((boost::math::isnan)(boost::math::lgamma_q(static_cast(0), static_cast(2)))); + BOOST_CHECK((boost::math::isnan)(boost::math::gamma_p_derivative(static_cast(-1), static_cast(2)))); BOOST_CHECK((boost::math::isnan)(boost::math::gamma_p_derivative(static_cast(1), static_cast(-2)))); BOOST_CHECK((boost::math::isnan)(boost::math::gamma_p_derivative(static_cast(0), static_cast(2)))); diff --git a/test/test_lgamma_q_double.cu b/test/test_lgamma_q_double.cu new file mode 100644 index 0000000000..4326587abf --- /dev/null +++ b/test/test_lgamma_q_double.cu @@ -0,0 +1,102 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::lgamma_q(in[i], in[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::lgamma_q(input_vector[i], input_vector[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_lgamma_q_float.cu b/test/test_lgamma_q_float.cu new file mode 100644 index 0000000000..a38d8e13fe --- /dev/null +++ b/test/test_lgamma_q_float.cu @@ -0,0 +1,102 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::lgamma_q(in[i], in[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::lgamma_q(input_vector[i], input_vector[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +}