Hi , In one of the environments, using pg_upgrade with hard links, PostgreSQL 12 has been upgraded to PostgreSQL 13.1. The OS was Ubuntu 16.04.7 LTS (Xenial Xerus). pg_repack was used to rebuild all the tables across the database right after the upgrade to PG 13.
A new server with Ubuntu 20.04.1 LTS was later provisioned. Streaming replication was set up from the Old Server running on Ubuntu 16 to New Server on Ubuntu 20 - same PG versions 13.1. Replication was running fine, but, after the failover to the New Server, an Update on a few random rows (not on the same page) was causing Segmentation fault and causing a crash of the Postgres. Selecting the records using the Index or directly from the table works absolutely fine. But, when the same records are updated, it gets into the following error. 2021-03-12 17:20:01.979 CET p#7 s#604b8fa9.7 t#0 LOG: terminating any other active server processes 2021-03-12 17:20:01.979 CET p#41 s#604b9212.29 t#0 WARNING: terminating connection because of crash of another server process 2021-03-12 17:20:01.979 CET p#41 s#604b9212.29 t#0 DETAIL: The postmaster has commanded this server process to roll back the current transaction and exit, because another server process exited abnormally and possibly corrupted shared memory. 2021-03-12 17:20:01.979 CET p#41 s#604b9212.29 t#0 HINT: In a moment you should be able to reconnect to the database and repeat your command. gdb backtrace looks like following with the debug symbols. (gdb) bt #0 __memmove_avx_unaligned_erms () at ../sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S:533 #1 0x000055b72761c370 in memmove (__len=<optimized out>, __src=0x55b72930e9c7, __dest=<optimized out>) at /usr/include/x86_64-linux-gnu/bits/string_fortified.h:40 #2 _bt_swap_posting (newitem=newitem@entry=0x55b7292010c0, oposting=oposting@entry=0x7f3b46f94778, postingoff=postingoff@entry=2) at ./build/../src/backend/access/nbtree/nbtdedup.c:796 #3 0x000055b72761d40b in _bt_insertonpg (rel=0x7f3acd8a49c0, itup_key=0x55b7292bc6a8, buf=507, cbuf=0, stack=0x55b7292d5f98, itup=0x55b7292010c0, itemsz=32, newitemoff=48, postingoff=2, split_only_page=false) at ./build/../src/backend/access/nbtree/nbtinsert.c:1167 #4 0x000055b72761eae9 in _bt_doinsert (rel=rel@entry=0x7f3acd8a49c0, itup=itup@entry=0x55b7292bc848, checkUnique=checkUnique@entry=UNIQUE_CHECK_NO, heapRel=heapRel@entry =0x7f3acd894f70) at ./build/../src/backend/access/nbtree/nbtinsert.c:1009 #5 0x000055b727621e2e in btinsert (rel=0x7f3acd8a49c0, values=<optimized out>, isnull=<optimized out>, ht_ctid=0x55b7292d4578, heapRel=0x7f3acd894f70, checkUnique=UNIQUE_CHECK_NO, indexInfo=0x55b7292bc238) at ./build/../src/backend/access/nbtree/nbtree.c:210 #6 0x000055b727757487 in ExecInsertIndexTuples (slot=slot@entry=0x55b7292d4548, estate=estate@entry=0x55b7291ff1f8, noDupErr=noDupErr@entry=false, specConflict=specConflict@entry=0x0, arbiterIndexes=arbiterIndexes@entry=0x0) at ./build/../src/backend/executor/execIndexing.c:393 #7 0x000055b7277807a8 in ExecUpdate (mtstate=0x55b7292bb2c8, tupleid=0x7fff45ea318a, oldtuple=0x0, slot=0x55b7292d4548, planSlot=0x55b7292c04e8, epqstate=0x55b7292bb3c0, estate=0x55b7291ff1f8, canSetTag=true) at ./build/../src/backend/executor/nodeModifyTable.c:1479 #8 0x000055b727781655 in ExecModifyTable (pstate=0x55b7292bb2c8) at ./build/../src/backend/executor/nodeModifyTable.c:2253 #9 0x000055b727758424 in ExecProcNode (node=0x55b7292bb2c8) at ./build/../src/include/executor/executor.h:248 #10 ExecutePlan (execute_once=<optimized out>, dest=0x55b7292c1728, direction=<optimized out>, numberTuples=0, sendTuples=<optimized out>, operation=CMD_UPDATE, use_parallel_mode=<optimized out>, planstate=0x55b7292bb2c8, estate=0x55b7291ff1f8) at ./build/../src/backend/executor/execMain.c:1632 #11 standard_ExecutorRun (queryDesc=0x55b7292ba578, direction=<optimized out>, count=0, execute_once=<optimized out>) at ./build/../src/backend/executor/execMain.c:350 #12 0x000055b7278bebf7 in ProcessQuery (plan=<optimized out>, sourceText=0x55b72919efa8 "\031)\267U", params=0x0, queryEnv=0x0, dest=0x55b7292c1728, qc=0x7fff45ea34c0) at ./build/../src/backend/tcop/pquery.c:160 #13 0x000055b7278bedf9 in PortalRunMulti (portal=portal@entry=0x55b729254128, isTopLevel=isTopLevel@entry=true, setHoldSnapshot=setHoldSnapshot@entry=false, dest=dest@entry=0x55b7292c1728, altdest=altdest@entry=0x55b7292c1728, qc=qc@entry=0x7fff45ea34c0) at ./build/../src/backend/tcop/pquery.c:1265 #14 0x000055b7278bf847 in PortalRun (portal=portal@entry=0x55b729254128, count=count@entry=9223372036854775807, isTopLevel=isTopLevel@entry=true, run_once=run_once@entry=true, dest=dest@entry=0x55b7292c1728, --Type <RET> for more, q to quit, c to continue without paging-- Is this expected when replication is happening between PostgreSQL databases hosted on different OS versions like Ubuntu 16 and Ubuntu 20 ? Or, do we think this is some sort of corruption ? -- Regards, Avi.