Hi, I'm a bit confused by this focus on bitfields - both Alexander and Konstantin stated they could reproduce the issue without the bitfields.
But we have observed the generated code being pretty grotty and it's caused more than enough confusion - so let's just replace them with plain uint8's and cast in switches. > I think the issue is that if the compiler decides to coalesce what we > think of as distinct (but neighboring) bitfields, then when you update > one of the bitfields you could be updating the other with stale data > from an earlier read where the cached stale data is cached in a > _register_. Thus the fact that the cache line should have the most up > to date data for that other field is irrelevant because the stale data > is in a _register_. > > The very fact that this can happen, that the C specs allow it, argues > that one must never have adjacent distinct (for some value of > "distinct") bitfields for anything that requires atomics. I think the barriers in place should prevent that. Greetings, Andres Freund
>From 3839e3a0a0e6717dcf6ee3d0547b3268c4a3fa3a Mon Sep 17 00:00:00 2001 From: Andres Freund <and...@anarazel.de> Date: Mon, 25 Aug 2025 20:16:33 -0400 Subject: [PATCH v1] aio: Stop using enum bitfields due to bad code generation During an investigation into rather odd aio related errors on macos, observed by Alexander and Konstantin, we started to wonder if bitfield access is related to the error. As it turns out, no. But during that investigation we noticed that at least gcc and clang generate rather terrible code for the bitfield access. Therefore, replace the enum bitfields with uint8s and instead cast in each switch statement. Reported-by: Alexander Lakhin <exclus...@gmail.com> Reported-by: Konstantin Knizhnik <knizh...@garret.ru> Discussion: https://postgr.es/m/1500090.1745443...@sss.pgh.pa.us Backpatch-through: 18 --- src/include/storage/aio_internal.h | 14 ++++++++++---- src/backend/storage/aio/aio.c | 10 +++++----- src/backend/storage/aio/aio_funcs.c | 2 +- src/backend/storage/aio/aio_io.c | 8 ++++---- src/backend/storage/aio/method_io_uring.c | 2 +- 5 files changed, 21 insertions(+), 15 deletions(-) diff --git a/src/include/storage/aio_internal.h b/src/include/storage/aio_internal.h index 2d37a243abe..b4de30f2ec1 100644 --- a/src/include/storage/aio_internal.h +++ b/src/include/storage/aio_internal.h @@ -92,17 +92,23 @@ typedef enum PgAioHandleState struct ResourceOwnerData; -/* typedef is in aio_types.h */ +/* + * Typedef is in aio_types.h + * + * We don't use the underlying enums for state, target and op to avoid wasting + * space. We tried using bitfields, but several compilers generate rather + * horrid code for that. + */ struct PgAioHandle { /* all state updates should go through pgaio_io_update_state() */ - PgAioHandleState state:8; + uint8 state; /* what are we operating on */ - PgAioTargetID target:8; + uint8 target; /* which IO operation */ - PgAioOp op:8; + uint8 op; /* bitfield of PgAioHandleFlags */ uint8 flags; diff --git a/src/backend/storage/aio/aio.c b/src/backend/storage/aio/aio.c index 3643f27ad6e..87d7136a936 100644 --- a/src/backend/storage/aio/aio.c +++ b/src/backend/storage/aio/aio.c @@ -275,7 +275,7 @@ pgaio_io_release_resowner(dlist_node *ioh_node, bool on_error) ResourceOwnerForgetAioHandle(ioh->resowner, &ioh->resowner_node); ioh->resowner = NULL; - switch (ioh->state) + switch ((PgAioHandleState) ioh->state) { case PGAIO_HS_IDLE: elog(ERROR, "unexpected"); @@ -600,7 +600,7 @@ pgaio_io_wait(PgAioHandle *ioh, uint64 ref_generation) if (pgaio_io_was_recycled(ioh, ref_generation, &state)) return; - switch (state) + switch ((PgAioHandleState) state) { case PGAIO_HS_IDLE: case PGAIO_HS_HANDED_OUT: @@ -825,7 +825,7 @@ pgaio_io_wait_for_free(void) &pgaio_my_backend->in_flight_ios); uint64 generation = ioh->generation; - switch (ioh->state) + switch ((PgAioHandleState) ioh->state) { /* should not be in in-flight list */ case PGAIO_HS_IDLE: @@ -905,7 +905,7 @@ static const char * pgaio_io_state_get_name(PgAioHandleState s) { #define PGAIO_HS_TOSTR_CASE(sym) case PGAIO_HS_##sym: return #sym - switch (s) + switch ((PgAioHandleState) s) { PGAIO_HS_TOSTR_CASE(IDLE); PGAIO_HS_TOSTR_CASE(HANDED_OUT); @@ -930,7 +930,7 @@ pgaio_io_get_state_name(PgAioHandle *ioh) const char * pgaio_result_status_string(PgAioResultStatus rs) { - switch (rs) + switch ((PgAioResultStatus) rs) { case PGAIO_RS_UNKNOWN: return "UNKNOWN"; diff --git a/src/backend/storage/aio/aio_funcs.c b/src/backend/storage/aio/aio_funcs.c index 34f8f632733..d7977387b8f 100644 --- a/src/backend/storage/aio/aio_funcs.c +++ b/src/backend/storage/aio/aio_funcs.c @@ -175,7 +175,7 @@ retry: values[4] = CStringGetTextDatum(pgaio_io_get_op_name(&ioh_copy)); /* columns: details about the IO's operation (offset, length) */ - switch (ioh_copy.op) + switch ((PgAioOp) ioh_copy.op) { case PGAIO_OP_INVALID: nulls[5] = true; diff --git a/src/backend/storage/aio/aio_io.c b/src/backend/storage/aio/aio_io.c index 520b5077df2..7d11d40284a 100644 --- a/src/backend/storage/aio/aio_io.c +++ b/src/backend/storage/aio/aio_io.c @@ -121,7 +121,7 @@ pgaio_io_perform_synchronously(PgAioHandle *ioh) START_CRIT_SECTION(); /* Perform IO. */ - switch (ioh->op) + switch ((PgAioOp) ioh->op) { case PGAIO_OP_READV: pgstat_report_wait_start(WAIT_EVENT_DATA_FILE_READ); @@ -176,7 +176,7 @@ pgaio_io_get_op_name(PgAioHandle *ioh) { Assert(ioh->op >= 0 && ioh->op < PGAIO_OP_COUNT); - switch (ioh->op) + switch ((PgAioOp) ioh->op) { case PGAIO_OP_INVALID: return "invalid"; @@ -198,7 +198,7 @@ pgaio_io_uses_fd(PgAioHandle *ioh, int fd) { Assert(ioh->state >= PGAIO_HS_DEFINED); - switch (ioh->op) + switch ((PgAioOp) ioh->op) { case PGAIO_OP_READV: return ioh->op_data.read.fd == fd; @@ -222,7 +222,7 @@ pgaio_io_get_iovec_length(PgAioHandle *ioh, struct iovec **iov) *iov = &pgaio_ctl->iovecs[ioh->iovec_off]; - switch (ioh->op) + switch ((PgAioOp) ioh->op) { case PGAIO_OP_READV: return ioh->op_data.read.iov_length; diff --git a/src/backend/storage/aio/method_io_uring.c b/src/backend/storage/aio/method_io_uring.c index 0a8c054162f..d4b7c7bc05c 100644 --- a/src/backend/storage/aio/method_io_uring.c +++ b/src/backend/storage/aio/method_io_uring.c @@ -660,7 +660,7 @@ pgaio_uring_sq_from_io(PgAioHandle *ioh, struct io_uring_sqe *sqe) { struct iovec *iov; - switch (ioh->op) + switch ((PgAioOp) ioh->op) { case PGAIO_OP_READV: iov = &pgaio_ctl->iovecs[ioh->iovec_off]; -- 2.48.1.76.g4e746b1a31.dirty