On Tue, Jul 30, 2024 at 12:39 PM Thomas Munro <[email protected]> wrote:
> On Tue, Jul 30, 2024 at 11:16 AM Heikki Linnakangas <[email protected]> wrote:
> > I think we should do:
> >
> > #ifdef _M_AMD64
> > #define __x86_64__
> > #endif
> >
> > somewhere, perhaps in src/include/port/win32.h.
I suppose we could define our own
PG_ARCH_{ARM,MIPS,POWER,RISCV,S390,SPARC,X86}_{32,64} in one central
place, instead. Draft patch for illustration.
From 4df24c6fe7370cdeacd4e794f4ccc6202a909e62 Mon Sep 17 00:00:00 2001
From: Thomas Munro <[email protected]>
Date: Thu, 1 Aug 2024 16:38:05 +1200
Subject: [PATCH] Standardize macros for detecting architectures.
Instead of repeating multiple compilers' architecture macros throughout
the tree, detect them in one central place, and define our own macros of
the form:
PG_ARCH_{ARM,MIPS,POWER,RISCV,S390,SPARC,X86}_{32,64}
This fixes the problem that MSVC builds were unintentionally using
suboptimal fallback code defined by "port/atomics.h", due to
inconsistent testing for macros. A couple of other places were also
affected.
XXX This patch doesn't adjust s_lock.h, because it's complicated, full
of old dead sub-architectures, and a nearby patch proposes to delete
it...
Discussion: https://postgr.es/m/CA%2BhUKGKAf_i6w7hB_3pqZXQeqn%2BixvY%2BCMps_n%3DmJ5HAatMjMw%40mail.gmail.com
---
contrib/pgcrypto/crypt-blowfish.c | 4 ++--
src/include/c.h | 33 +++++++++++++++++++++++++++++
src/include/port/atomics.h | 6 +++---
src/include/port/atomics/arch-x86.h | 16 +++++++-------
src/include/port/pg_bitutils.h | 6 +++---
src/include/port/simd.h | 2 +-
src/include/storage/s_lock.h | 2 +-
src/port/pg_crc32c_sse42.c | 4 ++--
8 files changed, 53 insertions(+), 20 deletions(-)
diff --git a/contrib/pgcrypto/crypt-blowfish.c b/contrib/pgcrypto/crypt-blowfish.c
index 5a1b1e10091..9c4e02e428b 100644
--- a/contrib/pgcrypto/crypt-blowfish.c
+++ b/contrib/pgcrypto/crypt-blowfish.c
@@ -38,10 +38,10 @@
#include "px-crypt.h"
#include "px.h"
-#ifdef __i386__
+#if defined(PG_ARCH_X86_32)
#define BF_ASM 0 /* 1 */
#define BF_SCALE 1
-#elif defined(__x86_64__)
+#elif defined(PG_ARCH_X86_64)
#define BF_ASM 0
#define BF_SCALE 1
#else
diff --git a/src/include/c.h b/src/include/c.h
index dc1841346cd..542cbd33fad 100644
--- a/src/include/c.h
+++ b/src/include/c.h
@@ -425,6 +425,39 @@ typedef void (*pg_funcptr_t) (void);
#define HAVE_PRAGMA_GCC_SYSTEM_HEADER 1
#endif
+/*
+ * Project-standardized name for CPU architectures, to avoid having to repeat
+ * the names that different compilers use.
+ */
+#if defined(__arm__) || defined(__arm)
+#define PG_ARCH_ARM_32
+#elif defined(__aarch64__) || defined(_M_ARM64)
+#define PG_ARCH_ARM_64
+#elif defined(__mips__)
+#define PG_ARCH_MIPS_32
+#elif defined(__mips64__)
+#define PG_ARCH_MIPS_64
+#elif defined(__ppc__) || defined(__powerpc__)
+#define PG_ARCH_POWER_32
+#elif defined(__ppc64__) || defined(__powerpc64__)
+#define PG_ARCH_POWER_64
+#elif defined(__riscv__)
+#define PG_ARCH_RISCV_32
+#elif defined(__riscv64__)
+#define PG_ARCH_RISCV_64
+#elif defined(__s390__)
+#define PG_ARCH_S390_32
+#elif defined(__s390x__)
+#define PG_ARCH_S390_64
+#elif defined(__sparc)
+#define PG_ARCH_SPARC_32
+#elif defined(__sparcv9)
+#define PG_ARCH_SPARC_64
+#elif defined(__i386__) || defined (__i386) || defined(_M_IX86)
+#define PG_ARCH_X86_32
+#elif defined(__x86_64__) || defined(__x86_64) || defined (__amd64)
+#define PG_ARCH_X86_64
+#endif
/* ----------------------------------------------------------------
* Section 2: bool, true, false
diff --git a/src/include/port/atomics.h b/src/include/port/atomics.h
index c0c8688f736..3300ea54c17 100644
--- a/src/include/port/atomics.h
+++ b/src/include/port/atomics.h
@@ -63,11 +63,11 @@
* compiler barrier.
*
*/
-#if defined(__arm__) || defined(__arm) || defined(__aarch64__)
+#if defined(PG_ARCH_ARM_32) || defined(PG_ARCH_ARM_64)
#include "port/atomics/arch-arm.h"
-#elif defined(__i386__) || defined(__i386) || defined(__x86_64__)
+#elif defined(PG_ARCH_X86_32) || defined(PG_ARCH_X86_64)
#include "port/atomics/arch-x86.h"
-#elif defined(__ppc__) || defined(__powerpc__) || defined(__ppc64__) || defined(__powerpc64__)
+#elif defined(PG_ARCH_POWER_32) || defined (PG_ARCH_POWER_64)
#include "port/atomics/arch-ppc.h"
#endif
diff --git a/src/include/port/atomics/arch-x86.h b/src/include/port/atomics/arch-x86.h
index c12f8a60697..9f20edf221f 100644
--- a/src/include/port/atomics/arch-x86.h
+++ b/src/include/port/atomics/arch-x86.h
@@ -32,10 +32,10 @@
*/
#if defined(__GNUC__) || defined(__INTEL_COMPILER)
-#if defined(__i386__) || defined(__i386)
+#if defined(PG_ARCH_X86_32)
#define pg_memory_barrier_impl() \
__asm__ __volatile__ ("lock; addl $0,0(%%esp)" : : : "memory", "cc")
-#elif defined(__x86_64__)
+#elif defined(PG_ARCH_X86_64)
#define pg_memory_barrier_impl() \
__asm__ __volatile__ ("lock; addl $0,0(%%rsp)" : : : "memory", "cc")
#endif
@@ -67,14 +67,14 @@ typedef struct pg_atomic_uint32
* It's too complicated to write inline asm for 64bit types on 32bit and the
* 486 can't do it anyway.
*/
-#ifdef __x86_64__
+#ifdef PG_ARCH_X86_64
#define PG_HAVE_ATOMIC_U64_SUPPORT
typedef struct pg_atomic_uint64
{
/* alignment guaranteed due to being on a 64bit platform */
volatile uint64 value;
} pg_atomic_uint64;
-#endif /* __x86_64__ */
+#endif /* PG_ARCH_X86_64 */
#endif /* defined(__GNUC__) || defined(__INTEL_COMPILER) */
@@ -109,7 +109,7 @@ pg_spin_delay_impl(void)
{
__asm__ __volatile__(" rep; nop \n");
}
-#elif defined(_MSC_VER) && defined(__x86_64__)
+#elif defined(_MSC_VER) && defined(PG_ARCH_X86_64)
#define PG_HAVE_SPIN_DELAY
static __forceinline void
pg_spin_delay_impl(void)
@@ -192,7 +192,7 @@ pg_atomic_fetch_add_u32_impl(volatile pg_atomic_uint32 *ptr, int32 add_)
return res;
}
-#ifdef __x86_64__
+#ifdef PG_ARCH_X86_64
#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64
static inline bool
@@ -231,7 +231,7 @@ pg_atomic_fetch_add_u64_impl(volatile pg_atomic_uint64 *ptr, int64 add_)
return res;
}
-#endif /* __x86_64__ */
+#endif /* PG_ARCH_X86_64 */
#endif /* defined(__GNUC__) || defined(__INTEL_COMPILER) */
@@ -241,6 +241,6 @@ pg_atomic_fetch_add_u64_impl(volatile pg_atomic_uint64 *ptr, int64 add_)
*/
#if defined(__i568__) || defined(__i668__) || /* gcc i586+ */ \
(defined(_M_IX86) && _M_IX86 >= 500) || /* msvc i586+ */ \
- defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) /* gcc, sunpro, msvc */
+ defined(PG_ARCH_X86_64)
#define PG_HAVE_8BYTE_SINGLE_COPY_ATOMICITY
#endif /* 8 byte single-copy atomicity */
diff --git a/src/include/port/pg_bitutils.h b/src/include/port/pg_bitutils.h
index 4d88478c9c2..39a756b7638 100644
--- a/src/include/port/pg_bitutils.h
+++ b/src/include/port/pg_bitutils.h
@@ -82,7 +82,7 @@ pg_leftmost_one_pos64(uint64 word)
#error must have a working 64-bit integer datatype
#endif /* HAVE_LONG_INT_64 */
-#elif defined(_MSC_VER) && (defined(_M_AMD64) || defined(_M_ARM64))
+#elif defined(_MSC_VER) && (defined(PG_ARCH_ARM_64) || defined(PG_ARCH_X86_64))
unsigned long result;
bool non_zero;
@@ -155,7 +155,7 @@ pg_rightmost_one_pos64(uint64 word)
#error must have a working 64-bit integer datatype
#endif /* HAVE_LONG_INT_64 */
-#elif defined(_MSC_VER) && (defined(_M_AMD64) || defined(_M_ARM64))
+#elif defined(_MSC_VER) && (defined(PG_ARCH_ARM_64) || defined(PG_ARCH_X86_64))
unsigned long result;
bool non_zero;
@@ -282,7 +282,7 @@ pg_ceil_log2_64(uint64 num)
* __builtin_popcount* intrinsic functions as they always emit popcnt
* instructions.
*/
-#if defined(_MSC_VER) && defined(_M_AMD64)
+#if defined(_MSC_VER) && defined(PG_ARCH_X86_64)
#define HAVE_X86_64_POPCNTQ
#endif
diff --git a/src/include/port/simd.h b/src/include/port/simd.h
index 597496f2fb7..8c3555707e1 100644
--- a/src/include/port/simd.h
+++ b/src/include/port/simd.h
@@ -18,7 +18,7 @@
#ifndef SIMD_H
#define SIMD_H
-#if (defined(__x86_64__) || defined(_M_AMD64))
+#ifdef PG_ARCH_X86_64
/*
* SSE2 instructions are part of the spec for the 64-bit x86 ISA. We assume
* that compilers targeting this architecture understand SSE2 intrinsics.
diff --git a/src/include/storage/s_lock.h b/src/include/storage/s_lock.h
index e94ed5f48bd..3b9749e6a4b 100644
--- a/src/include/storage/s_lock.h
+++ b/src/include/storage/s_lock.h
@@ -385,7 +385,7 @@ do \
/* PowerPC */
-#if defined(__ppc__) || defined(__powerpc__) || defined(__ppc64__) || defined(__powerpc64__)
+#if defined(PG_ARCH_POWER_32) || defined(PG_ARCH_POWER_64)
#define HAS_TEST_AND_SET
typedef unsigned int slock_t;
diff --git a/src/port/pg_crc32c_sse42.c b/src/port/pg_crc32c_sse42.c
index 7f88c114800..30c008ce4f4 100644
--- a/src/port/pg_crc32c_sse42.c
+++ b/src/port/pg_crc32c_sse42.c
@@ -32,7 +32,7 @@ pg_comp_crc32c_sse42(pg_crc32c crc, const void *data, size_t len)
* and performance testing didn't show any performance gain from aligning
* the begin address.
*/
-#ifdef __x86_64__
+#ifdef PG_ARCH_X86_64
while (p + 8 <= pend)
{
crc = (uint32) _mm_crc32_u64(crc, *((const uint64 *) p));
@@ -56,7 +56,7 @@ pg_comp_crc32c_sse42(pg_crc32c crc, const void *data, size_t len)
crc = _mm_crc32_u32(crc, *((const unsigned int *) p));
p += 4;
}
-#endif /* __x86_64__ */
+#endif /* PG_ARCH_X86_64 */
/* Process any remaining bytes one at a time. */
while (p < pend)
--
2.45.2