https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/188224
libclc: Update acosh This was originally ported from rocm device libs in ca4d382e119e1389c83dbb07d9ca0085e88b2944. Merge in more recent changes. Remove unused ep_log >From 217a3ef1c76d32892bce746895564995d1ae9674 Mon Sep 17 00:00:00 2001 From: Matt Arsenault <[email protected]> Date: Sat, 21 Mar 2026 21:44:11 +0100 Subject: [PATCH 1/2] libclc: Update acosh This was originally ported from rocm device libs in ca4d382e119e1389c83dbb07d9ca0085e88b2944. Merge in more recent changes. --- libclc/clc/lib/generic/math/clc_acosh.cl | 15 +-- libclc/clc/lib/generic/math/clc_acosh.inc | 114 +++++----------------- 2 files changed, 29 insertions(+), 100 deletions(-) diff --git a/libclc/clc/lib/generic/math/clc_acosh.cl b/libclc/clc/lib/generic/math/clc_acosh.cl index cc73b76dd32f4..7eb505e4eca45 100644 --- a/libclc/clc/lib/generic/math/clc_acosh.cl +++ b/libclc/clc/lib/generic/math/clc_acosh.cl @@ -5,20 +5,13 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// - #include "clc/clc_convert.h" #include "clc/float/definitions.h" -#include "clc/internal/clc.h" -#include "clc/math/clc_ep_log.h" -#include "clc/math/clc_fabs.h" -#include "clc/math/clc_fma.h" -#include "clc/math/clc_log1p.h" +#include "clc/math/clc_acosh.h" +#include "clc/math/clc_ep.h" +#include "clc/math/clc_log2_fast.h" #include "clc/math/clc_mad.h" -#include "clc/math/clc_sqrt.h" -#include "clc/math/math.h" -#include "clc/relational/clc_isinf.h" -#include "clc/relational/clc_isnan.h" -#include "clc/relational/clc_select.h" +#include "clc/math/clc_sqrt_fast.h" #define __CLC_BODY "clc_acosh.inc" #include "clc/math/gentype.inc" diff --git a/libclc/clc/lib/generic/math/clc_acosh.inc b/libclc/clc/lib/generic/math/clc_acosh.inc index 2f4a58742b8e3..02e623a650fc6 100644 --- a/libclc/clc/lib/generic/math/clc_acosh.inc +++ b/libclc/clc/lib/generic/math/clc_acosh.inc @@ -8,104 +8,40 @@ #if __CLC_FPSIZE == 32 -_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_acosh(__CLC_GENTYPE x) { - __CLC_UINTN ux = __CLC_AS_UINTN(x); - - // Arguments greater than 1/sqrt(epsilon) in magnitude are approximated by - // acosh(x) = ln(2) + ln(x) - // For 2.0 <= x <= 1/sqrt(epsilon) the approximation is: - // acosh(x) = ln(x + sqrt(x * x - 1)) - __CLC_INTN high = ux > 0x46000000U; - __CLC_INTN med = ux > 0x40000000U; - - __CLC_GENTYPE w = x - 1.0f; - __CLC_GENTYPE s = w * w + 2.0f * w; - __CLC_GENTYPE t = x * x - 1.0f; - __CLC_GENTYPE r = __clc_sqrt(med ? t : s) + (med ? x : w); - __CLC_GENTYPE v = (high ? x : r) - (med ? 1.0f : 0.0f); - __CLC_GENTYPE z = __clc_log1p(v) + (high ? 0x1.62e430p-1f : 0.0f); - - z = __clc_select(z, x, __clc_isnan(x) || __clc_isinf(x)); - z = x < 1.0f ? __CLC_GENTYPE_NAN : z; - - return z; +_CLC_DEF _CLC_OVERLOAD _CLC_CONST __CLC_FLOATN __clc_acosh(__CLC_FLOATN x) { + __CLC_INTN b = x >= 0x1.0p+64f; + __CLC_FLOATN s = b ? 0x1.0p-64f : 1.0f; + __CLC_FLOATN sx = x * s; + __CLC_EP_PAIR a = + __clc_ep_add(sx, __clc_ep_sqrt(__clc_ep_sub(__clc_ep_sqr(sx), s * s))); + __CLC_FLOATN z = __clc_ep_ln_hi(a, b ? 64 : 0); + + z = x == __CLC_GENTYPE_INF ? x : z; + return x < 1.0f ? FLT_NAN : z; } #elif __CLC_FPSIZE == 64 -_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_acosh(__CLC_GENTYPE x) { - // 1/sqrt(eps) = 9.49062656242515593767e+07 - const __CLC_GENTYPE recrteps = 0x1.6a09e667f3bcdp+26; - // log2_lead and log2_tail sum to an extra-precise version of log(2) - const __CLC_GENTYPE log2_lead = 0x1.62e42ep-1; - const __CLC_GENTYPE log2_tail = 0x1.efa39ef35793cp-25; - - // Handle x >= 128 here - __CLC_LONGN xlarge = x > recrteps; - __CLC_GENTYPE r = x + __clc_sqrt(__clc_fma(x, x, -1.0)); - r = xlarge ? x : r; - - __CLC_INTN xexp; - __CLC_GENTYPE r1, r2; - __clc_ep_log(r, &xexp, &r1, &r2); - - __CLC_GENTYPE dxexp = __CLC_CONVERT_GENTYPE( - __CLC_CONVERT_LONGN(xexp) + (xlarge ? (__CLC_LONGN)1 : (__CLC_LONGN)0)); - r1 = __clc_fma(dxexp, log2_lead, r1); - r2 = __clc_fma(dxexp, log2_tail, r2); - - __CLC_GENTYPE ret1 = r1 + r2; - - // Handle 1 < x < 128 here - // We compute the value - // t = x - 1.0 + sqrt(2.0*(x - 1.0) + (x - 1.0)*(x - 1.0)) - // using simulated quad precision. - __CLC_GENTYPE t = x - 1.0; - __CLC_GENTYPE u1 = t * 2.0; +_CLC_DEF _CLC_OVERLOAD _CLC_CONST __CLC_DOUBLEN __clc_acosh(__CLC_DOUBLEN x) { + __CLC_LONGN b = x >= 0x1.0p+512; + __CLC_DOUBLEN s = b ? 0x1.0p-512 : 1.0; + __CLC_DOUBLEN sx = x * s; + __CLC_EP_PAIR a = + __clc_ep_add(sx, __clc_ep_sqrt(__clc_ep_sub(__clc_ep_sqr(sx), s * s))); + __CLC_DOUBLEN z = __clc_ep_ln_hi(a, __CLC_CONVERT_INTN(b) ? 512 : 0); - // (t,0) * (t,0) -> (v1, v2) - __CLC_GENTYPE v1 = t * t; - __CLC_GENTYPE v2 = __clc_fma(t, t, -v1); - - // (u1,0) + (v1,v2) -> (w1,w2) - r = u1 + v1; - __CLC_GENTYPE s = (((u1 - r) + v1) + v2); - __CLC_GENTYPE w1 = r + s; - __CLC_GENTYPE w2 = (r - w1) + s; - - // sqrt(w1,w2) -> (u1,u2) - __CLC_GENTYPE p1 = __clc_sqrt(w1); - __CLC_GENTYPE a1 = p1 * p1; - __CLC_GENTYPE a2 = __clc_fma(p1, p1, -a1); - __CLC_GENTYPE temp = (((w1 - a1) - a2) + w2); - __CLC_GENTYPE p2 = MATH_DIVIDE(temp * 0.5, p1); - u1 = p1 + p2; - __CLC_GENTYPE u2 = (p1 - u1) + p2; - - // (u1,u2) + (t,0) -> (r1,r2) - r = u1 + t; - s = ((u1 - r) + t) + u2; - // r1 = r + s; - // r2 = (r - r1) + s; - // t = r1 + r2; - t = r + s; - - // For arguments 1.13 <= x <= 1.5 the log1p function is good enough - __CLC_GENTYPE ret2 = __clc_log1p(t); - - __CLC_GENTYPE ret = x >= 128.0 ? ret1 : ret2; - - ret = (__clc_isinf(x) || __clc_isnan(x)) ? x : ret; - ret = x == 1.0 ? 0.0 : ret; - ret = x < 1.0 ? __CLC_GENTYPE_NAN : ret; - - return ret; + z = x == __CLC_GENTYPE_INF ? x : z; + return x < 1.0 ? DBL_NAN : z; } #elif __CLC_FPSIZE == 16 -_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_acosh(__CLC_GENTYPE x) { - return __CLC_CONVERT_GENTYPE(__clc_acosh(__CLC_CONVERT_FLOATN(x))); +_CLC_DEF _CLC_OVERLOAD _CLC_CONST __CLC_HALFN __clc_acosh(__CLC_HALFN hx) { + __CLC_FLOATN x = __CLC_CONVERT_FLOATN(hx); + __CLC_FLOATN t = x + __clc_sqrt_fast(__clc_mad(x, x, -1.0f)); + __CLC_HALFN ret = __CLC_CONVERT_HALFN(__clc_log2_fast(t) * 0x1.62e430p-1f); + + return hx < 1.0h ? HALF_NAN : ret; } #endif >From c7b8f25ea7c7f5a85ee64aca227aa490d77f4826 Mon Sep 17 00:00:00 2001 From: Matt Arsenault <[email protected]> Date: Mon, 23 Mar 2026 19:25:01 +0100 Subject: [PATCH 2/2] Remove unused ep_log --- libclc/clc/include/clc/math/clc_ep_log.h | 15 ---- .../clc/include/clc/math/clc_ep_log_decl.inc | 16 ---- libclc/clc/lib/generic/CMakeLists.txt | 1 - libclc/clc/lib/generic/math/clc_ep_log.cl | 23 ------ libclc/clc/lib/generic/math/clc_ep_log.inc | 81 ------------------- 5 files changed, 136 deletions(-) delete mode 100644 libclc/clc/include/clc/math/clc_ep_log.h delete mode 100644 libclc/clc/include/clc/math/clc_ep_log_decl.inc delete mode 100644 libclc/clc/lib/generic/math/clc_ep_log.cl delete mode 100644 libclc/clc/lib/generic/math/clc_ep_log.inc diff --git a/libclc/clc/include/clc/math/clc_ep_log.h b/libclc/clc/include/clc/math/clc_ep_log.h deleted file mode 100644 index 8ac9546a844ed..0000000000000 --- a/libclc/clc/include/clc/math/clc_ep_log.h +++ /dev/null @@ -1,15 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef __CLC_MATH_CLC_EP_LOG_H__ -#define __CLC_MATH_CLC_EP_LOG_H__ - -#define __CLC_BODY "clc/math/clc_ep_log_decl.inc" -#include "clc/math/gentype.inc" - -#endif // __CLC_MATH_CLC_EP_LOG_H__ diff --git a/libclc/clc/include/clc/math/clc_ep_log_decl.inc b/libclc/clc/include/clc/math/clc_ep_log_decl.inc deleted file mode 100644 index a255a6a453074..0000000000000 --- a/libclc/clc/include/clc/math/clc_ep_log_decl.inc +++ /dev/null @@ -1,16 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#if __CLC_FPSIZE == 64 - -_CLC_DECL _CLC_OVERLOAD void __clc_ep_log(__CLC_GENTYPE x, - private __CLC_INTN *xexp, - private __CLC_GENTYPE *r1, - private __CLC_GENTYPE *r2); - -#endif diff --git a/libclc/clc/lib/generic/CMakeLists.txt b/libclc/clc/lib/generic/CMakeLists.txt index 8ad008a20bce3..03f4aa3e55a0a 100644 --- a/libclc/clc/lib/generic/CMakeLists.txt +++ b/libclc/clc/lib/generic/CMakeLists.txt @@ -77,7 +77,6 @@ libclc_configure_source_list(CLC_GENERIC_SOURCES math/clc_div_cr.cl math/clc_div_fast.cl math/clc_ep.cl - math/clc_ep_log.cl math/clc_erf.cl math/clc_erfc.cl math/clc_exp.cl diff --git a/libclc/clc/lib/generic/math/clc_ep_log.cl b/libclc/clc/lib/generic/math/clc_ep_log.cl deleted file mode 100644 index 2d228a0dbf193..0000000000000 --- a/libclc/clc/lib/generic/math/clc_ep_log.cl +++ /dev/null @@ -1,23 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifdef cl_khr_fp64 - -#include "clc/clc_convert.h" -#include "clc/internal/clc.h" -#include "clc/math/clc_ep_log.h" -#include "clc/math/clc_fma.h" -#include "clc/math/math.h" -#include "clc/math/tables.h" - -#pragma OPENCL EXTENSION cl_khr_fp64 : enable - -#define __CLC_BODY "clc_ep_log.inc" -#include "clc/math/gentype.inc" - -#endif diff --git a/libclc/clc/lib/generic/math/clc_ep_log.inc b/libclc/clc/lib/generic/math/clc_ep_log.inc deleted file mode 100644 index d8cc886ffc745..0000000000000 --- a/libclc/clc/lib/generic/math/clc_ep_log.inc +++ /dev/null @@ -1,81 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// Computes natural log(x). Algorithm based on: -// Ping-Tak Peter Tang -// "Table-driven implementation of the logarithm function in IEEE floating-point -// arithmetic" -// ACM Transactions on Mathematical Software (TOMS) Volume 16, Issue 4 (December -// 1990) -// -//===----------------------------------------------------------------------===// - -#if __CLC_FPSIZE == 64 - -#define LN0 8.33333333333317923934e-02 -#define LN1 1.25000000037717509602e-02 -#define LN2 2.23213998791944806202e-03 -#define LN3 4.34887777707614552256e-04 - -#define LF0 8.33333333333333593622e-02 -#define LF1 1.24999999978138668903e-02 -#define LF2 2.23219810758559851206e-03 - -_CLC_DEF _CLC_OVERLOAD void __clc_ep_log(__CLC_GENTYPE x, - private __CLC_INTN *xexp, - private __CLC_GENTYPE *r1, - private __CLC_GENTYPE *r2) { - __CLC_LONGN near_one = x >= 0x1.e0faap-1 && x <= 0x1.1082cp+0; - - __CLC_ULONGN ux = __CLC_AS_ULONGN(x); - __CLC_ULONGN uxs = - __CLC_AS_ULONGN(__CLC_AS_GENTYPE(0x03d0000000000000UL | ux) - 0x1.0p-962); - __CLC_LONGN c = ux < IMPBIT_DP64; - ux = c ? uxs : ux; - __CLC_INTN expadjust = - __CLC_CONVERT_INTN(c ? (__CLC_LONGN)60 : (__CLC_LONGN)0); - - // Store the exponent of x in xexp and put f into the range [0.5,1) - __CLC_INTN xexp1 = __CLC_CONVERT_INTN((ux >> EXPSHIFTBITS_DP64) & 0x7ff) - - EXPBIAS_DP64 - expadjust; - __CLC_GENTYPE f = __CLC_AS_GENTYPE(HALFEXPBITS_DP64 | (ux & MANTBITS_DP64)); - *xexp = __CLC_CONVERT_INTN(near_one) ? 0 : xexp1; - - __CLC_GENTYPE r = x - 1.0; - __CLC_GENTYPE u1 = MATH_DIVIDE(r, 2.0 + r); - __CLC_GENTYPE ru1 = -r * u1; - u1 = u1 + u1; - - __CLC_INTN index = __CLC_CONVERT_INTN(ux >> 45); - index = ((0x80 | (index & 0x7e)) >> 1) + (index & 0x1); - - __CLC_GENTYPE f1 = __CLC_CONVERT_GENTYPE(index) * 0x1.0p-7; - __CLC_GENTYPE f2 = f - f1; - __CLC_GENTYPE u2 = MATH_DIVIDE(f2, __clc_fma(0.5, f2, f1)); - - __CLC_GENTYPE z1 = __CLC_USE_TABLE(ln_tbl_lo, (index - 64)); - __CLC_GENTYPE q = __CLC_USE_TABLE(ln_tbl_hi, (index - 64)); - - z1 = near_one ? r : z1; - q = near_one ? 0.0 : q; - __CLC_GENTYPE u = near_one ? u1 : u2; - __CLC_GENTYPE v = u * u; - - __CLC_GENTYPE cc = near_one ? ru1 : u2; - - __CLC_GENTYPE z21 = - __clc_fma(v, __clc_fma(v, __clc_fma(v, LN3, LN2), LN1), LN0); - __CLC_GENTYPE z22 = __clc_fma(v, __clc_fma(v, LF2, LF1), LF0); - __CLC_GENTYPE z2 = near_one ? z21 : z22; - z2 = __clc_fma(u * v, z2, cc) + q; - - *r1 = z1; - *r2 = z2; -} - -#endif _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
