Re: standby promotion can create unreadable WAL
On Fri, Aug 26, 2022 at 7:53 PM Robert Haas wrote: > > On Fri, Aug 26, 2022 at 10:06 AM Alvaro Herrera > wrote: > > There's a small typo in the comment: "When find that". I suppose that > > was meant to be "When we find that". You end that para with "and thus > > we should not do this", but that sounds like it wouldn't matter if we > > did. Maybe "and thus doing this would be wrong, so skip it." or > > something like that. (Perhaps be even more specific and say "if we did > > this, we would later create an overwrite record in the wrong place, > > breaking everything") > > I think that saying that someone should not do something implies > pretty clearly that it would be bad if they did. But I have no problem > with your more specific language, and as a general rule, it's good to > be specific, so let's use that. > > v2 attached. The patch LGTM, this patch will apply on master and v15. PFA patch for back branches. -- Regards, Dilip Kumar EnterpriseDB: http://www.enterprisedb.com fix-contrecord-condition-v2_v14_v96.patch Description: Binary data
Re: use ARM intrinsics in pg_lfind32() where available
Here is a new patch set in which I've attempted to address all feedback. -- Nathan Bossart Amazon Web Services: https://aws.amazon.com >From a5f381097819db05b6e47418597cd56bab411fad Mon Sep 17 00:00:00 2001 From: Nathan Bossart Date: Thu, 25 Aug 2022 22:18:30 -0700 Subject: [PATCH v5 1/2] abstract architecture-specific implementation details from pg_lfind32() --- src/include/port/pg_lfind.h | 55 +++-- src/include/port/simd.h | 96 +++-- 2 files changed, 112 insertions(+), 39 deletions(-) diff --git a/src/include/port/pg_lfind.h b/src/include/port/pg_lfind.h index a4e13dffec..2f8413b59e 100644 --- a/src/include/port/pg_lfind.h +++ b/src/include/port/pg_lfind.h @@ -91,16 +91,19 @@ pg_lfind32(uint32 key, uint32 *base, uint32 nelem) { uint32 i = 0; -#ifdef USE_SSE2 +#ifndef USE_NO_SIMD /* - * A 16-byte register only has four 4-byte lanes. For better - * instruction-level parallelism, each loop iteration operates on a block - * of four registers. Testing has showed this is ~40% faster than using a - * block of two registers. + * For better instruction-level parallelism, each loop iteration operates + * on a block of four registers. Testing for SSE2 has showed this is ~40% + * faster than using a block of two registers. */ - const __m128i keys = _mm_set1_epi32(key); /* load 4 copies of key */ - uint32 iterations = nelem & ~0xF; /* round down to multiple of 16 */ + const Vector32 keys = vector32_broadcast(key); /* load copies of key */ + uint32 nelem_per_vector = sizeof(Vector32) / sizeof(uint32); + uint32 nelem_per_iteration = 4 * nelem_per_vector; + + /* round down to multiple of elements per iteration */ + uint32 tail_idx = nelem & ~(nelem_per_iteration - 1); #if defined(USE_ASSERT_CHECKING) bool assert_result = false; @@ -116,31 +119,33 @@ pg_lfind32(uint32 key, uint32 *base, uint32 nelem) } #endif - for (i = 0; i < iterations; i += 16) + for (i = 0; i < tail_idx; i += nelem_per_iteration) { - /* load the next block into 4 registers holding 4 values each */ - const __m128i vals1 = _mm_loadu_si128((__m128i *) & base[i]); - const __m128i vals2 = _mm_loadu_si128((__m128i *) & base[i + 4]); - const __m128i vals3 = _mm_loadu_si128((__m128i *) & base[i + 8]); - const __m128i vals4 = _mm_loadu_si128((__m128i *) & base[i + 12]); + Vector32 vals1, vals2, vals3, vals4, + result1, result2, result3, result4, + tmp1, tmp2, result; + + /* load the next block into 4 registers */ + vector32_load(, [i]); + vector32_load(, [i + nelem_per_vector]); + vector32_load(, [i + nelem_per_vector * 2]); + vector32_load(, [i + nelem_per_vector * 3]); /* compare each value to the key */ - const __m128i result1 = _mm_cmpeq_epi32(keys, vals1); - const __m128i result2 = _mm_cmpeq_epi32(keys, vals2); - const __m128i result3 = _mm_cmpeq_epi32(keys, vals3); - const __m128i result4 = _mm_cmpeq_epi32(keys, vals4); + result1 = vector32_eq(keys, vals1); + result2 = vector32_eq(keys, vals2); + result3 = vector32_eq(keys, vals3); + result4 = vector32_eq(keys, vals4); /* combine the results into a single variable */ - const __m128i tmp1 = _mm_or_si128(result1, result2); - const __m128i tmp2 = _mm_or_si128(result3, result4); - const __m128i result = _mm_or_si128(tmp1, tmp2); + tmp1 = vector32_or(result1, result2); + tmp2 = vector32_or(result3, result4); + result = vector32_or(tmp1, tmp2); /* see if there was a match */ - if (_mm_movemask_epi8(result) != 0) + if (vector32_is_highbit_set(result)) { -#if defined(USE_ASSERT_CHECKING) Assert(assert_result == true); -#endif return true; } } @@ -151,14 +156,14 @@ pg_lfind32(uint32 key, uint32 *base, uint32 nelem) { if (key == base[i]) { -#if defined(USE_SSE2) && defined(USE_ASSERT_CHECKING) +#ifndef USE_NO_SIMD Assert(assert_result == true); #endif return true; } } -#if defined(USE_SSE2) && defined(USE_ASSERT_CHECKING) +#ifndef USE_NO_SIMD Assert(assert_result == false); #endif return false; diff --git a/src/include/port/simd.h b/src/include/port/simd.h index a425cd887b..58b5f5ed86 100644 --- a/src/include/port/simd.h +++ b/src/include/port/simd.h @@ -31,39 +31,52 @@ #include #define USE_SSE2 typedef __m128i Vector8; +typedef __m128i Vector32; #else /* * If no SIMD instructions are available, we can in some cases emulate vector - * operations using bitwise operations on unsigned integers. + * operations using bitwise operations on unsigned integers. Note that many + * of the functions in this file presently do not have non-SIMD + * implementations. */ #define USE_NO_SIMD typedef uint64 Vector8; #endif - /* load/store operations */ static inline void vector8_load(Vector8 *v, const uint8 *s); +#ifndef USE_NO_SIMD +static inline void vector32_load(Vector32 *v, const uint32 *s); +#endif /* assignment operations */ static inline Vector8 vector8_broadcast(const uint8 c); +#ifndef
Re: [RFC] building postgres with meson - v12
On Sun, Aug 28, 2022 at 1:39 PM Andres Freund wrote: > On 2022-08-27 18:02:40 -0700, Andres Freund wrote: > > FWIW, I did notice that netbsd does have working unnamed semaphores. I don't > > know how long ago they were added, but they apparently didn't work quite > > right > > in 2018 [1]. No meaningful performance chance in the main regression tests, > > I'll run a concurrent check world comparison in the background... > > Unnamed ones are substantially worse unfortunately. On an 8 core netbsd 9.3 > VM: I could update my experimental patch to add home made semaphores using atomics and futexes. It needs NetBSD 10, though. Also works on OpenBSD and macOS.
Re: [RFC] building postgres with meson - v12
Hi, On 2022-08-27 18:02:40 -0700, Andres Freund wrote: > FWIW, I did notice that netbsd does have working unnamed semaphores. I don't > know how long ago they were added, but they apparently didn't work quite right > in 2018 [1]. No meaningful performance chance in the main regression tests, > I'll run a concurrent check world comparison in the background... Unnamed ones are substantially worse unfortunately. On an 8 core netbsd 9.3 VM: sysv: real4m39.777s user7m35.534s sys 7m33.831s unnamed posix real5m44.035s user7m23.326s sys 11m58.946s The difference in system time is even more substantial than the wall clock time. And repeated runs were even worse. I also had the ecpg tests hang in one run with unnamed posix semas, until I killed 'alloc'. Didn't reproduce since though. So clearly we shouldn't go and start auto-detecting unnamed posix sema support. Greetings, Andres Freund
Re: [RFC] building postgres with meson - v12
Hi, On 2022-08-27 11:04:47 -0700, Andres Freund wrote: > - choice of semaphore API needs to be cleaned up, that should be easy now, but > I thought that I needed to get a new version out first Everytime I look at the existing selection code I get confused, which is part of why I haven't tackled this yet. If I understand the current logic right, we check for sem_open, sem_init if and only if PREFERRED_SEMAPHORES is set to UNNAMED_POSIX or NAMED_POSIX (no template defaults to named). Which also means that we don't link in rt or pthread if USE_*_POSIX_SEMAPHORES is set directly in the template, as darwin does. I read the configure.ac code combined with the templates as resulting in the following precedence order: 1) windows uses windows semaphores 2) freebsd, linux use unnamed posix semaphores if available 3) macOS < 10.2 uses named semaphores, without linking in rt/pthread 4) macos >= 10.2 uses sysv semaphores 5) sysv semaphores are used Correct? Given that Mac OS 10.2 was released in 2002, I think we can safely consider that unsupported - even prairiedog was a few years younger... :). Given the downsides of named semaphores and that we apparently haven't used that code in years, I wonder if we should remove it? However, there has been a thread about defaulting to named semas on openbsd, but then Tom benchmarked out that that's not going to fly for performance reasons ([1]). FWIW, I did notice that netbsd does have working unnamed semaphores. I don't know how long ago they were added, but they apparently didn't work quite right in 2018 [1]. No meaningful performance chance in the main regression tests, I'll run a concurrent check world comparison in the background... Should the choice be configurable with meson? I'm inclined to say not for now. Regards, Andres [1] https://postgr.es/m/3010886.1634950831%40sss.pgh.pa.us [2] http://www.polarhome.com/service/man/?qf=sem_init=2=NetBSD=3
Re: Backends stunk in wait event IPC/MessageQueueInternal
On Sun, Jun 26, 2022 at 11:18 AM Thomas Munro wrote: > On Tue, May 17, 2022 at 3:31 PM Thomas Munro wrote: > > On Mon, May 16, 2022 at 3:45 PM Japin Li wrote: > > > Maybe use the __illumos__ macro more accurity. > > > > > > +#elif defined(WAIT_USE_EPOLL) && defined(HAVE_SYS_SIGNALFD_H) && \ > > > + !defined(__sun__) > > > > Thanks, updated, and with a new commit message. > > Pushed to master and REL_14_STABLE. FTR: I noticed that https://www.illumos.org/issues/13700 had been marked fixed, so I asked if we should remove our check[1]. Nope, another issue was opened at https://www.illumos.org/issues/14892, which I'll keep an eye on. It seems we're pretty good at hitting poll/event-related kernel bugs in various OSes. [1] https://git.postgresql.org/gitweb/?p=postgresql.git;a=commit;h=3ab4fc5dcf30ebc90a23ad878342dc528e2d25ce
Re: use ARM intrinsics in pg_lfind32() where available
On Sun, Aug 28, 2022 at 10:39:09AM +1200, Thomas Munro wrote: > On Sun, Aug 28, 2022 at 10:12 AM Nathan Bossart > wrote: >> Yup. The problem is that AFAICT there's no equivalent to >> _mm_movemask_epi8() on aarch64, so you end up with something like >> >> vmaxvq_u8(vandq_u8(v, vector8_broadcast(0x80))) != 0 >> >> But for pg_lfind32(), we really just want to know if any lane is set, which >> only requires a call to vmaxvq_u32(). I haven't had a chance to look too >> closely, but my guess is that this ultimately results in an extra AND >> operation in the aarch64 path, so maybe it doesn't impact performance too >> much. The other option would be to open-code the intrinsic function calls >> into pg_lfind.h. I'm trying to avoid the latter, but maybe it's the right >> thing to do for now... What do you think? > > Ahh, this gives me a flashback to John's UTF-8 validation thread[1] > (the beginner NEON hackery in there was just a learning exercise, > sadly not followed up with real patches...). He had > _mm_movemask_epi8(v) != 0 which I first translated to > to_bool(bitwise_and(v, vmovq_n_u8(0x80))) and he pointed out that > vmaxvq_u8(v) > 0x7F has the right effect without the and. I knew there had to be an easier way! I'll give this a try. Thanks. -- Nathan Bossart Amazon Web Services: https://aws.amazon.com
Re: use ARM intrinsics in pg_lfind32() where available
On Sun, Aug 28, 2022 at 10:12 AM Nathan Bossart wrote: > Yup. The problem is that AFAICT there's no equivalent to > _mm_movemask_epi8() on aarch64, so you end up with something like > > vmaxvq_u8(vandq_u8(v, vector8_broadcast(0x80))) != 0 > > But for pg_lfind32(), we really just want to know if any lane is set, which > only requires a call to vmaxvq_u32(). I haven't had a chance to look too > closely, but my guess is that this ultimately results in an extra AND > operation in the aarch64 path, so maybe it doesn't impact performance too > much. The other option would be to open-code the intrinsic function calls > into pg_lfind.h. I'm trying to avoid the latter, but maybe it's the right > thing to do for now... What do you think? Ahh, this gives me a flashback to John's UTF-8 validation thread[1] (the beginner NEON hackery in there was just a learning exercise, sadly not followed up with real patches...). He had _mm_movemask_epi8(v) != 0 which I first translated to to_bool(bitwise_and(v, vmovq_n_u8(0x80))) and he pointed out that vmaxvq_u8(v) > 0x7F has the right effect without the and. [1] https://www.postgresql.org/message-id/CA%2BhUKGJjyXvS6W05kRVpH6Kng50%3DuOGxyiyjgPKm707JxQYHCg%40mail.gmail.com
Re: use ARM intrinsics in pg_lfind32() where available
On Sat, Aug 27, 2022 at 05:18:34PM -0400, Tom Lane wrote: > In short, I think the critical part of 0002 needs to look more like > this: > > +#elif defined(__aarch64__) && defined(__ARM_NEON) > +/* > + * We use the Neon instructions if the compiler provides access to them > + * (as indicated by __ARM_NEON) and we are on aarch64. While Neon support is > + * technically optional for aarch64, it appears that all available 64-bit > + * hardware does have it. Neon exists in some 32-bit hardware too, but > + * we could not realistically use it there without a run-time check, > + * which seems not worth the trouble for now. > + */ > +#include > +#define USE_NEON > ... Thank you for the analysis! I'll do it this way in the next patch set. -- Nathan Bossart Amazon Web Services: https://aws.amazon.com
Re: use ARM intrinsics in pg_lfind32() where available
Thanks for taking a look. On Sat, Aug 27, 2022 at 01:59:06PM +0700, John Naylor wrote: > I don't forsee any use of emulating vector registers with uint64 if > they only hold two ints. I wonder if it'd be better if all vector32 > functions were guarded with #ifndef NO_USE_SIMD. (I wonder if > declarations without definitions cause warnings...) Yeah. I was a bit worried about the readability of this file with so many #ifndefs, but after trying it out, I suppose it doesn't look _too_ bad. > + * NB: This function assumes that each lane in the given vector either has > all > + * bits set or all bits zeroed, as it is mainly intended for use with > + * operations that produce such vectors (e.g., vector32_eq()). If this > + * assumption is not true, this function's behavior is undefined. > + */ > > Hmm? Yup. The problem is that AFAICT there's no equivalent to _mm_movemask_epi8() on aarch64, so you end up with something like vmaxvq_u8(vandq_u8(v, vector8_broadcast(0x80))) != 0 But for pg_lfind32(), we really just want to know if any lane is set, which only requires a call to vmaxvq_u32(). I haven't had a chance to look too closely, but my guess is that this ultimately results in an extra AND operation in the aarch64 path, so maybe it doesn't impact performance too much. The other option would be to open-code the intrinsic function calls into pg_lfind.h. I'm trying to avoid the latter, but maybe it's the right thing to do for now... What do you think? > -#elif defined(USE_SSE2) > +#elif defined(USE_SSE2) || defined(USE_NEON) > > I think we can just say #else. Yes. > -#if defined(USE_SSE2) > - __m128i sub; > +#ifndef USE_NO_SIMD > + Vector8 sub; > > +#elif defined(USE_NEON) > + > + /* use the same approach as the USE_SSE2 block above */ > + sub = vqsubq_u8(v, vector8_broadcast(c)); > + result = vector8_has_zero(sub); > > I think we should invent a helper that does saturating subtraction and > call that, inlining the sub var so we don't need to mess with it > further. Good idea, will do. -- Nathan Bossart Amazon Web Services: https://aws.amazon.com
Re: use ARM intrinsics in pg_lfind32() where available
I spent a bit more time researching the portability implications of this patch. I think that we should check __ARM_NEON before #including ; there is authoritative documentation out there telling you to, eg [1], and I can see no upside at all to not checking. We cannot check *only* __ARM_NEON, though. I found it to get defined by clang 8.0.0 in my Fedora 30 32-bit image, although that does not provide all the instructions we want (I see "undefined function" complaints for vmaxvq_u8 etc if I try to make it use the patch). Looking into that installation's , those functions are defined conditionally if "__ARM_FP & 2", which is kind of interesting --- per [1], that bit indicates support for 16-bit floating point, which seems a mite unrelated. It appears from the info at [2] that there are at least some 32-bit ARM platforms that set that bit, implying (if the clang authors are well informed) that they have the instructions we want. But we could not realistically make 32-bit builds that try to use those instructions without a run-time test; such a build would fail for too many people. I doubt that a run-time test is worth the trouble, so I concur with the idea of selecting NEON on aarch64 only and hoping to thereby avoid a runtime test. In short, I think the critical part of 0002 needs to look more like this: +#elif defined(__aarch64__) && defined(__ARM_NEON) +/* + * We use the Neon instructions if the compiler provides access to them + * (as indicated by __ARM_NEON) and we are on aarch64. While Neon support is + * technically optional for aarch64, it appears that all available 64-bit + * hardware does have it. Neon exists in some 32-bit hardware too, but + * we could not realistically use it there without a run-time check, + * which seems not worth the trouble for now. + */ +#include +#define USE_NEON ... Coding like this appears to work on both my Apple M1 and my Raspberry Pi, with several different OSes checked on the latter. regards, tom lane [1] https://developer.arm.com/documentation/101754/0618/armclang-Reference/Other-Compiler-specific-Features/Predefined-macros [2] http://micro-os-plus.github.io/develop/predefined-macros/
[Commitfest 2022-09] Begins This Thursday
Hi all, Just a reminder that September 2022 commitfest will begin this coming Thursday, September 1. As of now, there have been “267” patches in total. Out of these 267 patches, “22” patches required committer attention. Unfortunately, only three patches have a committer. I think the author needs to find a committer, or the committer needs to look at the patches. 1. Fix assertion failure with barriers in parallel hash join 2. pg_dump - read data for some options from external file 3. Add non-blocking version of PQcancel 4. use has_privs_of_role() for pg_hba.conf (jconway) 5. explain analyze rows=%.0f 6. Allow pageinspect's bt_page_stats function to return a set of rows instead of a single row 7. pg_stat_statements: Track statement entry timestamp 8. Add connection active, idle time to pg_stat_activity 9. Add Amcheck option for checking unique constraints in btree indexes 10. jit_warn_above_fraction parameter 11. fix spinlock contention in LogwrtResult 12. Faster pglz compression (fuzzycz) 13. Parallel Hash Full Join (macdice) 14. KnownAssignedXidsGetAndSetXmin performance 15. psql - refactor echo code 16. Use "WAL segment" instead of "log segment" consistently in user-facing messages 17. Avoid erroring out when unable to remove or parse logical rewrite files to save checkpoint work 18. pg_receivewal fail to streams when the partial file to write is not fully initialized present in the wal receiver directory 19. On client login event trigger 20. Update relfrozenxmin when truncating temp tables 21. XID formatting and SLRU refactorings (Independent part of: Add 64-bit XIDs into PostgreSQL 15) 22. Unit tests for SLRU Currently, we have 31 patches which require the author's attention. If you already fixed and replied, change the status. I'll send out reminders this week to get your patches registered/rebased, I'll update stale statuses in the CF app. Thanks, -- Ibrar Ahmed.
Re: [PATCH] Add native windows on arm64 support
Hi, On 2022-08-27 11:33:51 +0900, Michael Paquier wrote: > FWIW, I would not mind re-enabling that on HEAD, as of something like the > attached. I have done a dozen of runs without seeing a test failure, and > knowing that we don't support anything older than Win10 makes me feel safer > about this change. Any objections? +1. We don't have a choice about it on other architectures and it seems like a bad idea to disable on its own. I checked, and it looks like we didn't add --disable-dynamicbase, so ASLR wasn't disabled for mingw builds. Greetings, Andres Freund
Re: SQL/JSON features for v15
On 8/26/22 4:36 PM, Andrew Dunstan wrote: On 2022-08-26 Fr 16:11, Nikita Glukhov wrote: Hi, On 26.08.2022 22:25, Andrew Dunstan wrote: On 2022-08-24 We 20:05, Nikita Glukhov wrote: v8 - is a highly WIP patch, which I failed to finish today. Even some test cases fail now, and they simply show unfinished things like casts to bytea (they can be simply removed) and missing safe input functions. Thanks for your work, please keep going. I have completed in v9 all the things I previously planned: - Added missing safe I/O and type conversion functions for datetime, float4, varchar, bpchar. This introduces a lot of boilerplate code for returning errors and also maybe adds some overhead. - Added JSON_QUERY coercion to UTF8 bytea using pg_convert_to(). - Added immutability checks that were missed with elimination of coercion expressions. Coercions text::datetime, datetime1::datetime2 and even datetime::text for some datetime types are mutable. datetime::text can be made immutable by passing ISO date style into output functions (like in jsonpath). - Disabled non-Const expressions in DEFAULT ON EMPTY in non ERROR ON ERROR case. Non-constant expressions are tried to evaluate into Const directly inside transformExpr(). Maybe it would be better to simply remove DEFAULT ON EMPTY. Yes, I think that's what I suggested upthread. I don't think DEFAULT ON EMPTY matters that much, and we can revisit it for release 16. If it's simpler please do it that way. It is possible to easily split this patch into several subpatches, I will do it if needed. Thanks, probably a good idea but I will start reviewing what you have now. Andres and others please chime in if you can. Thanks Nikita! I looked through the tests to see if we would need any doc changes, e.g. in [1]. I noticed that this hint: "HINT: Use ERROR ON ERROR clause or try to simplify expression into constant-like form" lacks a period on the end, which is convention. I don't know if the SQL/JSON standard calls out if domains should be castable, but if it does, we should document in [1] that we are not currently supporting them as return types, so that we're only supporting "constant-like" expressions with examples. Looking forward to hearing other feedback. Thanks, Jonathan [1] https://www.postgresql.org/docs/15/functions-json.html#FUNCTIONS-SQLJSON OpenPGP_signature Description: OpenPGP digital signature
Re: First draft of the PG 15 release notes
Hi, I noticed a stray "DETAILS?" marker while going through the release notes for 15. Is that subsection still under construction or review? > > > Record and check the collation of eachlinkend="sql-createdatabase">database (Peter Eisentraut) > [...] > to match the operating system collation version. DETAILS? > > Kind regards, Matthias van de Meent
Re: [BUG] Logical replication failure "ERROR: could not map filenode "base/13237/442428" to relation OID" with catalog modifying txns
On Sat, Aug 27, 2022 at 7:24 PM Amit Kapila wrote: > > On Sat, Aug 27, 2022 at 1:06 PM Masahiko Sawada wrote: > > > > On Sat, Aug 27, 2022 at 3:56 PM Amit Kapila wrote: > > > > > > On Fri, Jul 29, 2022 at 12:15 PM Amit Kapila > > > wrote: > > > > > > > > > > > > > > Yeah, your description makes sense to me. I've also considered how to > > > > > hit this path but I guess it is never hit. Thinking of it in another > > > > > way, first of all, at least 2 catalog modifying transactions have to > > > > > be running while writing a xl_running_xacts. The serialized snapshot > > > > > that is written when we decode the first xl_running_xact has two > > > > > transactions. Then, one of them is committed before the second > > > > > xl_running_xacts. The second serialized snapshot has only one > > > > > transaction. Then, the transaction is also committed after that. Now, > > > > > in order to execute the path, we need to start decoding from the first > > > > > serialized snapshot. However, if we start from there, we cannot decode > > > > > the full contents of the transaction that was committed later. > > > > > > > > > > > > > I think then we should change this code in the master branch patch > > > > with an additional comment on the lines of: "Either all the xacts got > > > > purged or none. It is only possible to partially remove the xids from > > > > this array if one or more of the xids are still running but not all. > > > > That can happen if we start decoding from a point (LSN where the > > > > snapshot state became consistent) where all the xacts in this were > > > > running and then at least one of those got committed and a few are > > > > still running. We will never start from such a point because we won't > > > > move the slot's restart_lsn past the point where the oldest running > > > > transaction's restart_decoding_lsn is." > > > > > > > > > > Unfortunately, this theory doesn't turn out to be true. While > > > investigating the latest buildfarm failure [1], I see that it is > > > possible that only part of the xacts in the restored catalog modifying > > > xacts list needs to be purged. See the attached where I have > > > demonstrated it via a reproducible test. It seems the point we were > > > missing was that to start from a point where two or more catalog > > > modifying were serialized, it requires another open transaction before > > > both get committed, and then we need the checkpoint or other way to > > > force running_xacts record in-between the commit of initial two > > > catalog modifying xacts. There could possibly be other ways as well > > > but the theory above wasn't correct. > > > > > > > Thank you for the analysis and the patch. I have the same conclusion. > > Since we took this approach only on the master the back branches are > > not affected. > > > > The new test scenario makes sense to me and looks better than the one > > I have. Regarding the fix, I think we should use > > TransactionIdFollowsOrEquals() instead of > > NormalTransactionIdPrecedes(): > > > > + for (off = 0; off < builder->catchange.xcnt; off++) > > + { > > + if (NormalTransactionIdPrecedes(builder->catchange.xip[off], > > + builder->xmin)) > > + break; > > + } > > > > Right, fixed. Thank you for updating the patch! It looks good to me. Regards, -- Masahiko Sawada EDB: https://www.enterprisedb.com/
Re: Use array as object (src/fe_utils/parallel_slot.c)
Em sáb., 27 de ago. de 2022 às 00:00, Michael Paquier escreveu: > On Fri, Aug 26, 2022 at 01:54:26PM -0300, Ranier Vilela wrote: > > Is it worth creating a commiffest? > > Don't think so, but feel free to create one and mark me as committer > if you think that's appropriate. I have marked this thread as > something to do soon-ishly Hi Michael, I see the commit. Thanks for the hardest part. Suspecting something wrong is easy, the difficult thing is to describe why it is wrong. , but I am being distracted by life this > month. > Glad to know, enjoy. regards, Ranier Vilela
Re: [BUG] Logical replication failure "ERROR: could not map filenode "base/13237/442428" to relation OID" with catalog modifying txns
On Sat, Aug 27, 2022 at 1:06 PM Masahiko Sawada wrote: > > On Sat, Aug 27, 2022 at 3:56 PM Amit Kapila wrote: > > > > On Fri, Jul 29, 2022 at 12:15 PM Amit Kapila > > wrote: > > > > > > > > > > > Yeah, your description makes sense to me. I've also considered how to > > > > hit this path but I guess it is never hit. Thinking of it in another > > > > way, first of all, at least 2 catalog modifying transactions have to > > > > be running while writing a xl_running_xacts. The serialized snapshot > > > > that is written when we decode the first xl_running_xact has two > > > > transactions. Then, one of them is committed before the second > > > > xl_running_xacts. The second serialized snapshot has only one > > > > transaction. Then, the transaction is also committed after that. Now, > > > > in order to execute the path, we need to start decoding from the first > > > > serialized snapshot. However, if we start from there, we cannot decode > > > > the full contents of the transaction that was committed later. > > > > > > > > > > I think then we should change this code in the master branch patch > > > with an additional comment on the lines of: "Either all the xacts got > > > purged or none. It is only possible to partially remove the xids from > > > this array if one or more of the xids are still running but not all. > > > That can happen if we start decoding from a point (LSN where the > > > snapshot state became consistent) where all the xacts in this were > > > running and then at least one of those got committed and a few are > > > still running. We will never start from such a point because we won't > > > move the slot's restart_lsn past the point where the oldest running > > > transaction's restart_decoding_lsn is." > > > > > > > Unfortunately, this theory doesn't turn out to be true. While > > investigating the latest buildfarm failure [1], I see that it is > > possible that only part of the xacts in the restored catalog modifying > > xacts list needs to be purged. See the attached where I have > > demonstrated it via a reproducible test. It seems the point we were > > missing was that to start from a point where two or more catalog > > modifying were serialized, it requires another open transaction before > > both get committed, and then we need the checkpoint or other way to > > force running_xacts record in-between the commit of initial two > > catalog modifying xacts. There could possibly be other ways as well > > but the theory above wasn't correct. > > > > Thank you for the analysis and the patch. I have the same conclusion. > Since we took this approach only on the master the back branches are > not affected. > > The new test scenario makes sense to me and looks better than the one > I have. Regarding the fix, I think we should use > TransactionIdFollowsOrEquals() instead of > NormalTransactionIdPrecedes(): > > + for (off = 0; off < builder->catchange.xcnt; off++) > + { > + if (NormalTransactionIdPrecedes(builder->catchange.xip[off], > + builder->xmin)) > + break; > + } > Right, fixed. -- With Regards, Amit Kapila. v2-0001-Fix-the-incorrect-assertion-introduced-in-commit-.patch Description: Binary data
Re: Avoid erroring out when unable to remove or parse logical rewrite files to save checkpoint work
On Thu, Aug 25, 2022 at 5:51 PM Michael Paquier wrote: > > FWIW, the changes in reorderbuffer.c for > ReorderBufferCleanupSerializedTXNs() reduce the code readability, in > my opinion, so that's one less argument in favor of this change. Agreed. I reverted the changes. > The gain in ParseConfigDirectory() is kind of cool. > pg_tzenumerate_next(), copydir(), RemoveXlogFile() > StartupReplicationSlots(), CheckPointLogicalRewriteHeap() and > RemovePgTempFilesInDir() seem fine, as well. At least these avoid > extra lstat() calls when the file type is unknown, which would be only > a limited number of users where some of the three DT_* are missing > (right?). Yes, the idea is to avoid lstat() system calls when possible. PSA v17 patch with reorderbuffer.c changes reverted. -- Bharath Rupireddy RDS Open Source Databases: https://aws.amazon.com/rds/postgresql/ v17-0001-Make-more-use-of-get_dirent_type-in-place-of-sta.patch Description: Binary data
Re: pg_stat_wal: tracking the compression effect
On Fri, Aug 26, 2022 at 8:39 AM Kyotaro Horiguchi wrote: > > At Fri, 26 Aug 2022 11:55:27 +0900 (JST), Kyotaro Horiguchi > wrote in > > At Thu, 25 Aug 2022 16:04:50 +0900, Ken Kato > > wrote in > > > Accumulating the values, which indicates how much space is saved by > > > each compression (size before compression - size after compression), > > > and keep track of how many times compression has happened. So that one > > > can know how much space is saved on average. > > > > Honestly, I don't think its useful much. > > How about adding them to pg_waldump and pg_walinspect instead? > > > > # It further widens the output of pg_waldump, though.. > > Sorry, that was apparently too short. > > I know you already see that in per-record output of pg_waldump, but > maybe we need the summary of saved bytes in "pg_waldump -b -z" output > and the corresponding output of pg_walinspect. +1 for adding compression stats such as type and saved bytes to pg_waldump and pg_walinspect given that the WAL records already have the saved bytes info. Collecting them in the server via pg_stat_wal will require some extra effort, for instance, every WAL record insert requires that code to be executed. When users want to analyze the compression efforts they can either use pg_walinspect or pg_waldump and change the compression type if required. -- Bharath Rupireddy RDS Open Source Databases: https://aws.amazon.com/rds/postgresql/
Re: [BUG] Logical replication failure "ERROR: could not map filenode "base/13237/442428" to relation OID" with catalog modifying txns
On Sat, Aug 27, 2022 at 3:56 PM Amit Kapila wrote: > > On Fri, Jul 29, 2022 at 12:15 PM Amit Kapila wrote: > > > > > > > > Yeah, your description makes sense to me. I've also considered how to > > > hit this path but I guess it is never hit. Thinking of it in another > > > way, first of all, at least 2 catalog modifying transactions have to > > > be running while writing a xl_running_xacts. The serialized snapshot > > > that is written when we decode the first xl_running_xact has two > > > transactions. Then, one of them is committed before the second > > > xl_running_xacts. The second serialized snapshot has only one > > > transaction. Then, the transaction is also committed after that. Now, > > > in order to execute the path, we need to start decoding from the first > > > serialized snapshot. However, if we start from there, we cannot decode > > > the full contents of the transaction that was committed later. > > > > > > > I think then we should change this code in the master branch patch > > with an additional comment on the lines of: "Either all the xacts got > > purged or none. It is only possible to partially remove the xids from > > this array if one or more of the xids are still running but not all. > > That can happen if we start decoding from a point (LSN where the > > snapshot state became consistent) where all the xacts in this were > > running and then at least one of those got committed and a few are > > still running. We will never start from such a point because we won't > > move the slot's restart_lsn past the point where the oldest running > > transaction's restart_decoding_lsn is." > > > > Unfortunately, this theory doesn't turn out to be true. While > investigating the latest buildfarm failure [1], I see that it is > possible that only part of the xacts in the restored catalog modifying > xacts list needs to be purged. See the attached where I have > demonstrated it via a reproducible test. It seems the point we were > missing was that to start from a point where two or more catalog > modifying were serialized, it requires another open transaction before > both get committed, and then we need the checkpoint or other way to > force running_xacts record in-between the commit of initial two > catalog modifying xacts. There could possibly be other ways as well > but the theory above wasn't correct. > Thank you for the analysis and the patch. I have the same conclusion. Since we took this approach only on the master the back branches are not affected. The new test scenario makes sense to me and looks better than the one I have. Regarding the fix, I think we should use TransactionIdFollowsOrEquals() instead of NormalTransactionIdPrecedes(): + for (off = 0; off < builder->catchange.xcnt; off++) + { + if (NormalTransactionIdPrecedes(builder->catchange.xip[off], + builder->xmin)) + break; + } Regards, -- Masahiko Sawada EDB: https://www.enterprisedb.com/
Re: windows cfbot failing: my_perl
On Sat, Aug 27, 2022 at 2:23 PM Andres Freund wrote: > > Hi, > > On 2022-08-27 12:53:24 +0700, John Naylor wrote: > > Update: I tried taking the CI for a spin, but ran into IT issues with > > Github when I tried to push my branch to remote. > > A github, not a CI issue? Just making sure... Yeah, I forked PG from the Github page, cloned it locally, applied the patch and tried to push to origin. > As a workaround you can just open a CF entry, that'll run the patch soon. Yeah, I did that after taking a break -- there are compiler warnings for contrib/sepgsql/label.c where pfree's argument is cast to void *, so seems unrelated. > But either way, I ran the patch "manually" in a windows VM that I had running > anyway. With the meson patchset, but I don't see how it could matter here. > > 1/5 postgresql:setup / tmp_install OK > 1.30s > 2/5 postgresql:jsonb_plperl / jsonb_plperl/regress OK > 8.30s > 3/5 postgresql:bool_plperl / bool_plperl/regressOK > 8.30s > 4/5 postgresql:hstore_plperl / hstore_plperl/regressOK > 8.64s > 5/5 postgresql:plperl / plperl/regress OK > 10.41s > > Ok: 5 > > > I didn't test other platforms. > > > WRT the patch's commit message: The issue isn't that perl's free() is > redefined, it's that perl's #define free (which references perl globals!) > breaks windows' header... Ah, thanks for that detail and for testing, will push. -- John Naylor EDB: http://www.enterprisedb.com
Re: windows cfbot failing: my_perl
Hi, On 2022-08-27 12:53:24 +0700, John Naylor wrote: > On Sat, Aug 27, 2022 at 11:20 AM John Naylor > wrote: > > > > Here's a patch with that idea, not tested on Windows yet. > > Update: I tried taking the CI for a spin, but ran into IT issues with > Github when I tried to push my branch to remote. A github, not a CI issue? Just making sure... As a workaround you can just open a CF entry, that'll run the patch soon. But either way, I ran the patch "manually" in a windows VM that I had running anyway. With the meson patchset, but I don't see how it could matter here. 1/5 postgresql:setup / tmp_install OK 1.30s 2/5 postgresql:jsonb_plperl / jsonb_plperl/regress OK 8.30s 3/5 postgresql:bool_plperl / bool_plperl/regressOK 8.30s 4/5 postgresql:hstore_plperl / hstore_plperl/regressOK 8.64s 5/5 postgresql:plperl / plperl/regress OK 10.41s Ok: 5 I didn't test other platforms. WRT the patch's commit message: The issue isn't that perl's free() is redefined, it's that perl's #define free (which references perl globals!) breaks windows' header... Greetings, Andres Freund
Re: windows cfbot failing: my_perl
On 2022-08-26 23:02:06 -0400, Tom Lane wrote: > John Naylor writes: > > On Sat, Aug 27, 2022 at 4:15 AM Andres Freund wrote: > >> I think it'd also be needed in hstore_plperl.c, jsonb_plperl.c. Putting the > >> include in plperl.h would keep that aspect transparent, because > >> plperl_utils.h > >> includes plperl.h. > > > Since plperl_helpers.h already includes plperl.h, I'm not sure why > > both are included everywhere the former is. If .c/.xs files didn't > > include plperl.h directly, we could keep pg_wchar.h in > > plperl_helpers.h. Not sure if that's workable or any better... > > Maybe we should flush the separate plperl_helpers.h header and just > put those static-inline functions in plperl.h. +1
Re: use ARM intrinsics in pg_lfind32() where available
On Sat, Aug 27, 2022 at 1:24 AM Nathan Bossart wrote: > > Here is a rebased patch set that applies to HEAD. 0001: #define USE_NO_SIMD typedef uint64 Vector8; +typedef uint64 Vector32; #endif I don't forsee any use of emulating vector registers with uint64 if they only hold two ints. I wonder if it'd be better if all vector32 functions were guarded with #ifndef NO_USE_SIMD. (I wonder if declarations without definitions cause warnings...) + * NB: This function assumes that each lane in the given vector either has all + * bits set or all bits zeroed, as it is mainly intended for use with + * operations that produce such vectors (e.g., vector32_eq()). If this + * assumption is not true, this function's behavior is undefined. + */ Hmm? Also, is_highbit_set() already has uses same intrinsic and has the same intended effect, since we only care about the boolean result. 0002: -#elif defined(USE_SSE2) +#elif defined(USE_SSE2) || defined(USE_NEON) I think we can just say #else. -#if defined(USE_SSE2) - __m128i sub; +#ifndef USE_NO_SIMD + Vector8 sub; +#elif defined(USE_NEON) + + /* use the same approach as the USE_SSE2 block above */ + sub = vqsubq_u8(v, vector8_broadcast(c)); + result = vector8_has_zero(sub); I think we should invent a helper that does saturating subtraction and call that, inlining the sub var so we don't need to mess with it further. Otherwise seems fine. -- John Naylor EDB: http://www.enterprisedb.com
Re: [BUG] Logical replication failure "ERROR: could not map filenode "base/13237/442428" to relation OID" with catalog modifying txns
On Fri, Jul 29, 2022 at 12:15 PM Amit Kapila wrote: > > > > > Yeah, your description makes sense to me. I've also considered how to > > hit this path but I guess it is never hit. Thinking of it in another > > way, first of all, at least 2 catalog modifying transactions have to > > be running while writing a xl_running_xacts. The serialized snapshot > > that is written when we decode the first xl_running_xact has two > > transactions. Then, one of them is committed before the second > > xl_running_xacts. The second serialized snapshot has only one > > transaction. Then, the transaction is also committed after that. Now, > > in order to execute the path, we need to start decoding from the first > > serialized snapshot. However, if we start from there, we cannot decode > > the full contents of the transaction that was committed later. > > > > I think then we should change this code in the master branch patch > with an additional comment on the lines of: "Either all the xacts got > purged or none. It is only possible to partially remove the xids from > this array if one or more of the xids are still running but not all. > That can happen if we start decoding from a point (LSN where the > snapshot state became consistent) where all the xacts in this were > running and then at least one of those got committed and a few are > still running. We will never start from such a point because we won't > move the slot's restart_lsn past the point where the oldest running > transaction's restart_decoding_lsn is." > Unfortunately, this theory doesn't turn out to be true. While investigating the latest buildfarm failure [1], I see that it is possible that only part of the xacts in the restored catalog modifying xacts list needs to be purged. See the attached where I have demonstrated it via a reproducible test. It seems the point we were missing was that to start from a point where two or more catalog modifying were serialized, it requires another open transaction before both get committed, and then we need the checkpoint or other way to force running_xacts record in-between the commit of initial two catalog modifying xacts. There could possibly be other ways as well but the theory above wasn't correct. [1] - https://buildfarm.postgresql.org/cgi-bin/show_log.pl?nm=curculio=2022-08-25%2004%3A15%3A34 -- With Regards, Amit Kapila. diff --git a/contrib/test_decoding/expected/catalog_change_snapshot.out b/contrib/test_decoding/expected/catalog_change_snapshot.out index dc4f9b7..d2a4bdf 100644 --- a/contrib/test_decoding/expected/catalog_change_snapshot.out +++ b/contrib/test_decoding/expected/catalog_change_snapshot.out @@ -1,4 +1,4 @@ -Parsed test spec with 2 sessions +Parsed test spec with 3 sessions starting permutation: s0_init s0_begin s0_savepoint s0_truncate s1_checkpoint s1_get_changes s0_commit s0_begin s0_insert s1_checkpoint s1_get_changes s0_commit s1_get_changes step s0_init: SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding'); @@ -42,3 +42,57 @@ COMMIT stop (1 row) + +starting permutation: s0_init s0_begin s0_truncate s2_begin s2_truncate s1_checkpoint s1_get_changes s0_commit s0_begin s0_insert s1_checkpoint s1_get_changes s2_commit s1_checkpoint s1_get_changes s0_commit s1_get_changes +step s0_init: SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding'); +?column? + +init +(1 row) + +step s0_begin: BEGIN; +step s0_truncate: TRUNCATE tbl1; +step s2_begin: BEGIN; +step s2_truncate: TRUNCATE tbl2; +step s1_checkpoint: CHECKPOINT; +step s1_get_changes: SELECT data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'skip-empty-xacts', '1', 'include-xids', '0'); +data + +(0 rows) + +step s0_commit: COMMIT; +step s0_begin: BEGIN; +step s0_insert: INSERT INTO tbl1 VALUES (1); +step s1_checkpoint: CHECKPOINT; +step s1_get_changes: SELECT data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'skip-empty-xacts', '1', 'include-xids', '0'); +data +--- +BEGIN +table public.tbl1: TRUNCATE: (no-flags) +COMMIT +(3 rows) + +step s2_commit: COMMIT; +step s1_checkpoint: CHECKPOINT; +step s1_get_changes: SELECT data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'skip-empty-xacts', '1', 'include-xids', '0'); +data +--- +BEGIN +table public.tbl2: TRUNCATE: (no-flags) +COMMIT +(3 rows) + +step s0_commit: COMMIT; +step s1_get_changes: SELECT data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'skip-empty-xacts', '1', 'include-xids', '0'); +data +- +BEGIN