Hi All, On our private port of GCC 4.4.1 we fail to combine successive SHIFT operations like in the following case
#include <stdlib.h> #include <stdio.h> void f1 () { unsigned short t1; unsigned short t2; t1 = rand(); t2 = rand(); t1 <<= 1; t2 <<= 1; t1 <<= 1; t2 <<= 1; t1 <<= 1; t2 <<= 1; t1 <<= 1; t2 <<= 1; t1 <<= 1; t2 <<= 1; t1 <<= 1; t2 <<= 1; printf("%d\n", (t1+t2)); } This is a ZERO_EXTEND problem, because combining SHIFTs with whole integers works correctly, so do signed values. The problem seems to arise in the RTL combiner which combines the ZERO_EXTEND with the SHIFT to generate a SHIFT and an AND. Our architecture does not support AND with large constants and hence do not have a matching insn pattern (we prefer not doing this, because of large constants remain hanging at the end of all RTL optimisations and cause needless reloads). Fixing the combiner to convert masking AND operations to ZERO_EXTRACT fixes this issue without any obvious regressions. I'm adding the patch here against GCC 4.4.1 for any comments and/or suggestions. Cheers, Rahul --- combine.c 2009-04-01 21:47:37.000000000 +0100 +++ combine.c 2010-02-04 15:04:41.000000000 +0000 @@ -446,6 +446,7 @@ static void record_truncated_values (rtx *, void *); static bool reg_truncated_to_mode (enum machine_mode, const_rtx); static rtx gen_lowpart_or_truncate (enum machine_mode, rtx); +static bool can_zero_extract_p (rtx, rtx, enum machine_mode); /* It is not safe to use ordinary gen_lowpart in combine. @@ -6973,6 +6974,16 @@ make_compound_operation (XEXP (x, 0), next_code), i, NULL_RTX, 1, 1, 0, 1); + else if (can_zero_extract_p (XEXP (x, 0), XEXP (x, 1), mode)) + { + unsigned HOST_WIDE_INT len = HOST_BITS_PER_WIDE_INT + - CLZ_HWI (UINTVAL (XEXP (x, 1))); + new_rtx = make_extraction (mode, + make_compound_operation (XEXP (x, 0), + next_code), + 0, NULL_RTX, len, 1, 0, + in_code == COMPARE); + } break; @@ -7245,6 +7256,25 @@ return simplify_gen_unary (TRUNCATE, mode, x, GET_MODE (x)); } +static bool +can_zero_extract_p (rtx x, rtx mask_rtx, enum machine_mode mode) +{ + unsigned HOST_WIDE_INT count_lz, count_tz; + unsigned HOST_WIDE_INT nonzero, mask_all; + unsigned HOST_WIDE_INT mask_value = UINTVAL (mask_rtx); + + mask_all = (unsigned HOST_WIDE_INT) -1; + nonzero = nonzero_bits (x, mode); + count_lz = CLZ_HWI (mask_value); + count_tz = CTZ_HWI (mask_value); + + if (count_tz <= (unsigned HOST_WIDE_INT) CTZ_HWI (nonzero) + && ((mask_all >> (count_lz + count_tz)) << count_tz) == mask_value) + return true; + + return false; +} + /* See if X can be simplified knowing that we will only refer to it in MODE and will only refer to those bits that are nonzero in MASK. If other bits are being computed or if masking operations are done @@ -8957,7 +8987,6 @@ op0 = UNKNOWN; *pop0 = op0; - /* ??? Slightly redundant with the above mask, but not entirely. Moving this above means we'd have to sign-extend the mode mask for the final test. */