Hi! In https://sourceware.org/pipermail/libc-alpha/2023-November/152819.html Florian Weimer raised concern that the type-generic stdbit.h macros currently being considered suffer from similar problem as old tgmath.h implementation, in particular that the macros expand during preprocessing their arguments multiple times and if one nests these stdbit.h type-generic macros several times, that can result in extremely large preprocessed source and long compile times, even when the argument is only actually evaluated once at runtime for side-effects.
As I'd strongly prefer not to add new builtins for all the 14 stdbit.h type-generic macros, I think it is better to build the macros from smaller building blocks. The following patch adds the first one. While one can say implement e.g. stdc_leading_zeros(value) macro as ((unsigned int) __builtin_clzg (value, __builtin_popcountg ((__typeof (value)) ~(__typeof (value)) 0))) that expands the argument 3 times, and even if it just used ((unsigned int) __builtin_clzg (value, __builtin_popcountg ((__typeof (value)) -1))) relying on 2-s complement, that is still twice. I'd prefer not to add optional 3rd argument to these, but given that the second argument if specified right now has to have signed int type, the following patch adds an exception that it allows 0ULL as a magic value for the argument to mean fill in the precision of the first argument. Ok for trunk if it passes bootstrap/regtest? 2023-11-18 Jakub Jelinek <ja...@redhat.com> PR c/111309 gcc/ * builtins.cc (fold_builtin_bit_query): If arg1 is 0ULL, use TYPE_PRECISION (arg0_type) instead of it. * fold-const-call.cc (fold_const_call_sss): Rename arg0_type argument to arg_type, add arg1_type argument, if for CLZ/CTZ second argument is unsigned long long, use TYPE_PRECISION (arg0_type). (fold_const_call_1): Pass also TREE_TYPE (arg1) to fold_const_call_sss. * doc/extend.texi (__builtin_clzg, __builtin_ctzg): Document behavior for second argument 0ULL. gcc/c-family/ * c-common.cc (check_builtin_function_arguments): If args[1] is 0ULL, use TYPE_PRECISION (TREE_TYPE (args[0])) instead of it. gcc/testsuite/ * c-c++-common/pr111309-3.c: New test. * gcc.dg/torture/bitint-43.c: Add tests with 0ULL second argument. --- gcc/builtins.cc.jj 2023-11-14 10:52:16.170276318 +0100 +++ gcc/builtins.cc 2023-11-18 13:55:02.996395917 +0100 @@ -9591,6 +9591,10 @@ fold_builtin_bit_query (location_t loc, case BUILT_IN_CLZG: if (arg1 && TREE_CODE (arg1) != INTEGER_CST) return NULL_TREE; + if (arg1 + && (TYPE_MAIN_VARIANT (TREE_TYPE (arg1)) + == long_long_unsigned_type_node)) + arg1 = build_int_cst (integer_type_node, TYPE_PRECISION (arg0_type)); ifn = IFN_CLZ; fcodei = BUILT_IN_CLZ; fcodel = BUILT_IN_CLZL; @@ -9599,6 +9603,10 @@ fold_builtin_bit_query (location_t loc, case BUILT_IN_CTZG: if (arg1 && TREE_CODE (arg1) != INTEGER_CST) return NULL_TREE; + if (arg1 + && (TYPE_MAIN_VARIANT (TREE_TYPE (arg1)) + == long_long_unsigned_type_node)) + arg1 = build_int_cst (integer_type_node, TYPE_PRECISION (arg0_type)); ifn = IFN_CTZ; fcodei = BUILT_IN_CTZ; fcodel = BUILT_IN_CTZL; --- gcc/fold-const-call.cc.jj 2023-11-14 10:52:16.186276097 +0100 +++ gcc/fold-const-call.cc 2023-11-18 13:49:57.514641417 +0100 @@ -1543,13 +1543,13 @@ fold_const_call_sss (real_value *result, *RESULT = FN (ARG0, ARG1) - where ARG_TYPE is the type of ARG0 and PRECISION is the number of bits in - the result. Return true on success. */ + where ARG0_TYPE is the type of ARG0, ARG1_TYPE is the type of ARG1 and + PRECISION is the number of bits in the result. Return true on success. */ static bool fold_const_call_sss (wide_int *result, combined_fn fn, const wide_int_ref &arg0, const wide_int_ref &arg1, - unsigned int precision, tree arg_type ATTRIBUTE_UNUSED) + unsigned int precision, tree arg0_type, tree arg1_type) { switch (fn) { @@ -1559,6 +1559,8 @@ fold_const_call_sss (wide_int *result, c int tmp; if (wi::ne_p (arg0, 0)) tmp = wi::clz (arg0); + else if (TYPE_MAIN_VARIANT (arg1_type) == long_long_unsigned_type_node) + tmp = TYPE_PRECISION (arg0_type); else tmp = arg1.to_shwi (); *result = wi::shwi (tmp, precision); @@ -1571,6 +1573,8 @@ fold_const_call_sss (wide_int *result, c int tmp; if (wi::ne_p (arg0, 0)) tmp = wi::ctz (arg0); + else if (TYPE_MAIN_VARIANT (arg1_type) == long_long_unsigned_type_node) + tmp = TYPE_PRECISION (arg0_type); else tmp = arg1.to_shwi (); *result = wi::shwi (tmp, precision); @@ -1625,7 +1629,7 @@ fold_const_call_1 (combined_fn fn, tree wide_int result; if (fold_const_call_sss (&result, fn, wi::to_wide (arg0), wi::to_wide (arg1), TYPE_PRECISION (type), - TREE_TYPE (arg0))) + TREE_TYPE (arg0), TREE_TYPE (arg1))) return wide_int_to_tree (type, result); } return NULL_TREE; --- gcc/doc/extend.texi.jj 2023-11-16 17:27:39.838028110 +0100 +++ gcc/doc/extend.texi 2023-11-18 13:17:40.982551766 +0100 @@ -15031,6 +15031,9 @@ optional second argument with int type. are performed on the first argument. If two arguments are specified, and first argument is 0, the result is the second argument. If only one argument is specified and it is 0, the result is undefined. +As an exception, if two arguments are specified and the second argument +is 0ULL, it is as if the second argument was the bit width of the first +argument. @enddefbuiltin @defbuiltin{int __builtin_ctzg (...)} @@ -15040,6 +15043,9 @@ optional second argument with int type. are performed on the first argument. If two arguments are specified, and first argument is 0, the result is the second argument. If only one argument is specified and it is 0, the result is undefined. +As an exception, if two arguments are specified and the second argument +is 0ULL, it is as if the second argument was the bit width of the first +argument. @enddefbuiltin @defbuiltin{int __builtin_clrsbg (...)} --- gcc/c-family/c-common.cc.jj 2023-11-14 18:26:05.193616416 +0100 +++ gcc/c-family/c-common.cc 2023-11-18 13:20:55.751844490 +0100 @@ -6540,6 +6540,13 @@ check_builtin_function_arguments (locati "%qE does not have integral type", 2, fndecl); return false; } + if (integer_zerop (args[1]) + && (TYPE_MAIN_VARIANT (TREE_TYPE (args[1])) + == long_long_unsigned_type_node)) + args[1] = build_int_cst (integer_type_node, + INTEGRAL_TYPE_P (TREE_TYPE (args[0])) + ? TYPE_PRECISION (TREE_TYPE (args[0])) + : 0); if ((TYPE_PRECISION (TREE_TYPE (args[1])) > TYPE_PRECISION (integer_type_node)) || (TYPE_PRECISION (TREE_TYPE (args[1])) --- gcc/testsuite/c-c++-common/pr111309-3.c.jj 2023-11-18 13:22:22.084644472 +0100 +++ gcc/testsuite/c-c++-common/pr111309-3.c 2023-11-18 13:26:12.894436233 +0100 @@ -0,0 +1,26 @@ +/* PR c/111309 */ +/* { dg-do run } */ +/* { dg-options "-O2" } */ + +int +main () +{ + if (__builtin_clzg ((unsigned char) 0, 0ULL) != __CHAR_BIT__ + || __builtin_clzg ((unsigned short) 0, 0ULL) != __SIZEOF_SHORT__ * __CHAR_BIT__ + || __builtin_clzg (0U, 0ULL) != __SIZEOF_INT__ * __CHAR_BIT__ + || __builtin_clzg (0UL, 0ULL) != __SIZEOF_LONG__ * __CHAR_BIT__ + || __builtin_clzg (0ULL, 0ULL) != __SIZEOF_LONG_LONG__ * __CHAR_BIT__ +#ifdef __SIZEOF_INT128__ + || __builtin_clzg ((unsigned __int128) 0, 0ULL) != __SIZEOF_INT128__ * __CHAR_BIT__ +#endif + || __builtin_clzg ((unsigned char) 1, 0ULL) != __CHAR_BIT__ - 1 + || __builtin_clzg ((unsigned short) 2, 0ULL) != __SIZEOF_SHORT__ * __CHAR_BIT__ - 2 + || __builtin_clzg (4U, 0ULL) != __SIZEOF_INT__ * __CHAR_BIT__ - 3 + || __builtin_clzg (8UL, 0ULL) != __SIZEOF_LONG__ * __CHAR_BIT__ - 4 + || __builtin_clzg (16ULL, 0ULL) != __SIZEOF_LONG_LONG__ * __CHAR_BIT__ - 5 +#ifdef __SIZEOF_INT128__ + || __builtin_clzg ((unsigned __int128) 32, 0ULL) != __SIZEOF_INT128__ * __CHAR_BIT__ - 6 +#endif + || 0) + __builtin_abort (); +} --- gcc/testsuite/gcc.dg/torture/bitint-43.c.jj 2023-11-14 10:52:16.191276028 +0100 +++ gcc/testsuite/gcc.dg/torture/bitint-43.c 2023-11-18 13:28:49.335261722 +0100 @@ -141,7 +141,9 @@ main () || parity156 (0) != 0 || popcount156 (0) != 0 || __builtin_clzg ((unsigned _BitInt(156)) 0, 156 + 32) != 156 + 32 + || __builtin_clzg ((unsigned _BitInt(156)) 0, 0ULL) != 156 || __builtin_ctzg ((unsigned _BitInt(156)) 0, 156) != 156 + || __builtin_ctzg ((unsigned _BitInt(156)) 0, 0ULL) != 156 || __builtin_clrsbg ((_BitInt(156)) 0) != 156 - 1 || __builtin_ffsg ((_BitInt(156)) 0) != 0 || __builtin_parityg ((unsigned _BitInt(156)) 0) != 0 @@ -159,8 +161,10 @@ main () || popcount156 (-1) != 156 || __builtin_clzg ((unsigned _BitInt(156)) -1) != 0 || __builtin_clzg ((unsigned _BitInt(156)) -1, 156 + 32) != 0 + || __builtin_clzg ((unsigned _BitInt(156)) -1, 0ULL) != 0 || __builtin_ctzg ((unsigned _BitInt(156)) -1) != 0 || __builtin_ctzg ((unsigned _BitInt(156)) -1, 156) != 0 + || __builtin_ctzg ((unsigned _BitInt(156)) -1, 0ULL) != 0 || __builtin_clrsbg ((_BitInt(156)) -1) != 156 - 1 || __builtin_ffsg ((_BitInt(156)) -1) != 1 || __builtin_parityg ((unsigned _BitInt(156)) -1) != 0 Jakub