https://gcc.gnu.org/bugzilla/show_bug.cgi?id=119186
Bug ID: 119186
Summary: Using __builtin_ctz results in a error result.
Product: gcc
Version: 12.3.0
Status: UNCONFIRMED
Severity: normal
Priority: P3
Component: tree-optimization
Assignee: unassigned at gcc dot gnu.org
Reporter: drfeng08 at gmail dot com
Target Milestone: ---
> g++ --version
g++ (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0
Copyright (C) 2022 Free Software Foundation, Inc.
This is free software; see the source for copying conditions. There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
> cat reproduce.cpp
#include <immintrin.h>
#include <cstddef>
#include <cstdint>
#include <cstring>
#include <iostream>
inline int sse_memcmp2(const char* p1, const char* p2, int size) {
__m128i left = _mm_lddqu_si128((__m128i*)(p1));
__m128i right = _mm_lddqu_si128((__m128i*)(p2));
__m128i nz = ~_mm_cmpeq_epi8(left, right);
unsigned short mask = _mm_movemask_epi8(nz);
int index = __builtin_ctz(mask);
asm volatile("" : : : "memory");
std::cout << index << ":" << size << std::endl;
if (index >= size) return 0;
int l = (uint8_t)p1[index];
int r = (uint8_t)p2[index];
return l - r;
}
int main() {
const char c1[32] = "0123456789abcdef";
const char c2[32] = "0123456789abcdef";
size_t length = 16;
int res = memcmp(c1, c2, length);
int res2 = sse_memcmp2(c1, c2, length);
if (res != res2) {
std::abort();
}
std::cout << "finished" << std::endl;
return 0;
}
> g++ reproduce.cpp -msse4.2 -O2 -g -fno-strict-aliasing
32:16
Aborted (core dumped)
It works in gcc 11.4.0 or -O0 in 12.3.0v