https://gcc.gnu.org/bugzilla/show_bug.cgi?id=117323
Jakub Jelinek <jakub at gcc dot gnu.org> changed:
What |Removed |Added
----------------------------------------------------------------------------
CC| |jakub at gcc dot gnu.org
--- Comment #3 from Jakub Jelinek <jakub at gcc dot gnu.org> ---
I see >> 7 and no / 128 in both cases with -O2 on
template<typename T>
constexpr inline const T &
min (const T &a, const T &b)
{
if (b < a)
return b;
return a;
}
template<typename T>
constexpr inline const T &
max (const T &a, const T &b)
{
if (a < b)
return b;
return a;
}
void
foo (short *p1, short *p2, char *__restrict p3, int n)
{
for (int i = 0; i != n; i++)
{
short sum0 = p1[i];
short sum1 = p2[i];
sum0 = max<int> (0, min<int> (127, sum0));
sum1 = max<int> (0, min<int> (127, sum1));
p3[i] = static_cast<char> (sum0 * sum1 / 128);
}
}
void
bar (short *p1, short *p2, char *__restrict p3, int n)
{
for (int i = 0; i != n; i++)
{
short sum0 = p1[i];
short sum1 = p2[i];
sum0 = max<int> (min<int> (127, sum0), 0);
sum1 = max<int> (min<int> (127, sum1), 0);
p3[i] = static_cast<char> (sum0 * sum1 / 128);
}
}
at least starting with the vrp2 pass.
Or do you mean that just in bar and not foo on x86_64 already evrp/vrp1 can do
that?