From cc5272cb4acd864482fa0f5ca051e4e1d659c32e Mon Sep 17 00:00:00 2001
From: Jakub Wartak <jakub.wartak@enterprisedb.com>
Date: Fri, 21 Feb 2025 10:19:35 +0100
Subject: [PATCH v8 1/3] Add optional dependency to libnuma (Linux-only) for
 basic NUMA awareness routines and add minimal src/port/pg_numa.c portability
 wrapper.

Other platforms can be added later.

libnuma is unavailable on 32-bit builds, so due to lack of i386 shared object,
we disable it there (it does not make sense anyway as i386 is is very memory-only
limited even with PAE)

Author: Jakub Wartak <jakub.wartak@enterprisedb.com>
Co-authored-by: Bertrand Drouvot <bertranddrouvot.pg@gmail.com>
Reviewed-by: Andres Freund <andres@anarazel.de>
Discussion: https://postgr.es/m/CAKZiRmxh6KWo0aqRqvmcoaX2jUxZYb4kGp3N%3Dq1w%2BDiH-696Xw%40mail.gmail.com
---
 .cirrus.tasks.yml              |   7 +-
 configure                      |  87 +++++++++++++++++++
 configure.ac                   |  13 +++
 doc/src/sgml/installation.sgml |  20 +++++
 meson.build                    |  17 ++++
 meson_options.txt              |   3 +
 src/Makefile.global.in         |   1 +
 src/backend/Makefile           |   3 +
 src/include/pg_config.h.in     |   3 +
 src/include/port/pg_numa.h     |  43 ++++++++++
 src/makefiles/meson.build      |   3 +
 src/port/Makefile              |   1 +
 src/port/meson.build           |   1 +
 src/port/pg_numa.c             | 150 +++++++++++++++++++++++++++++++++
 14 files changed, 351 insertions(+), 1 deletion(-)
 create mode 100644 src/include/port/pg_numa.h
 create mode 100644 src/port/pg_numa.c

diff --git a/.cirrus.tasks.yml b/.cirrus.tasks.yml
index 91b51142d2..584e3e5a44 100644
--- a/.cirrus.tasks.yml
+++ b/.cirrus.tasks.yml
@@ -428,6 +428,8 @@ task:
     DEBIAN_FRONTEND=noninteractive apt-get -y install \
       libcurl4-openssl-dev \
       libcurl4-openssl-dev:i386 \
+      libnuma1 \
+      libnuma-dev
 
   matrix:
     - name: Linux - Debian Bookworm - Autoconf
@@ -448,6 +450,7 @@ task:
             --enable-cassert --enable-injection-points --enable-debug \
             --enable-tap-tests --enable-nls \
             --with-segsize-blocks=6 \
+            --with-libnuma \
             \
             ${LINUX_CONFIGURE_FEATURES} \
             \
@@ -492,6 +495,7 @@ task:
             -Dllvm=disabled \
             --pkg-config-path /usr/lib/i386-linux-gnu/pkgconfig/ \
             -DPERL=perl5.36-i386-linux-gnu \
+            -Dlibnuma=disabled \
             build-32
         EOF
 
@@ -804,7 +808,8 @@ task:
 
   setup_additional_packages_script: |
     apt-get update
-    DEBIAN_FRONTEND=noninteractive apt-get -y install libcurl4-openssl-dev
+    DEBIAN_FRONTEND=noninteractive apt-get -y install libcurl4-openssl-dev \
+      libnuma1 libnuma-dev
 
   ###
   # Test that code can be built with gcc/clang without warnings
diff --git a/configure b/configure
index 93fddd6998..23c33dd997 100755
--- a/configure
+++ b/configure
@@ -711,6 +711,7 @@ with_libxml
 LIBCURL_LIBS
 LIBCURL_CFLAGS
 with_libcurl
+with_libnuma
 with_uuid
 with_readline
 with_systemd
@@ -868,6 +869,7 @@ with_libedit_preferred
 with_uuid
 with_ossp_uuid
 with_libcurl
+with_libnuma
 with_libxml
 with_libxslt
 with_system_tzdata
@@ -1581,6 +1583,7 @@ Optional Packages:
   --with-uuid=LIB         build contrib/uuid-ossp using LIB (bsd,e2fs,ossp)
   --with-ossp-uuid        obsolete spelling of --with-uuid=ossp
   --with-libcurl          build with libcurl support
+  --with-libnuma          build with libnuma support
   --with-libxml           build with XML support
   --with-libxslt          use XSLT support when building contrib/xml2
   --with-system-tzdata=DIR
@@ -9140,6 +9143,33 @@ fi
 
 
 
+#
+# NUMA 
+#
+
+
+
+# Check whether --with-libnuma was given.
+if test "${with_libnuma+set}" = set; then :
+  withval=$with_libnuma;
+  case $withval in
+    yes)
+
+$as_echo "#define USE_LIBNUMA 1" >>confdefs.h
+
+      ;;
+    no)
+      :
+      ;;
+    *)
+      as_fn_error $? "no argument expected for --with-libnuma option" "$LINENO" 5
+      ;;
+  esac
+
+else
+  with_libnuma=no
+
+fi
 
 
 
@@ -12378,6 +12408,63 @@ fi
 
 fi
 
+if test "$with_libnuma" = yes ; then
+
+  ac_fn_c_check_header_mongrel "$LINENO" "numa.h" "ac_cv_header_numa_h" "$ac_includes_default"
+if test "x$ac_cv_header_numa_h" = xyes; then :
+
+else
+  as_fn_error $? "header file <numa.h> is required for --with-libnuma" "$LINENO" 5
+fi
+
+
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for numa_available in -lnuma" >&5
+$as_echo_n "checking for numa_available in -lnuma... " >&6; }
+if ${ac_cv_lib_numa_numa_available+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-lnuma  $LIBS"
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char numa_available ();
+int
+main ()
+{
+return numa_available ();
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+  ac_cv_lib_numa_numa_available=yes
+else
+  ac_cv_lib_numa_numa_available=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_numa_numa_available" >&5
+$as_echo "$ac_cv_lib_numa_numa_available" >&6; }
+if test "x$ac_cv_lib_numa_numa_available" = xyes; then :
+
+  LIBS="-lnuma $LIBS"
+
+else
+  as_fn_error $? "library 'numa' does not provide numa_available" "$LINENO" 5
+fi
+
+fi
+
 # XXX libcurl must link after libgssapi_krb5 on FreeBSD to avoid segfaults
 # during gss_acquire_cred(). This is possibly related to Curl's Heimdal
 # dependency on that platform?
diff --git a/configure.ac b/configure.ac
index b6d02f5ecc..1a394dfc07 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1041,6 +1041,19 @@ if test "$with_libcurl" = yes ; then
 fi
 
 
+#
+# libnuma
+#
+AC_MSG_CHECKING([whether to build with libnuma support])
+PGAC_ARG_BOOL(with, libnuma, no, [use libnuma for NUMA awareness],
+              [AC_DEFINE([USE_LIBNUMA], 1, [Define to build with NUMA awareness support. (--with-libnuma)])])
+AC_MSG_RESULT([$with_libnuma])
+AC_SUBST(with_libnuma)
+
+if test "$with_libnuma" = yes ; then
+  AC_CHECK_LIB(numa,    numa_available, [], [AC_MSG_ERROR([library 'libnuma' is required for NUMA awareness])])
+fi
+
 #
 # XML
 #
diff --git a/doc/src/sgml/installation.sgml b/doc/src/sgml/installation.sgml
index e076cefa3b..79203e45a8 100644
--- a/doc/src/sgml/installation.sgml
+++ b/doc/src/sgml/installation.sgml
@@ -1156,6 +1156,16 @@ build-postgresql:
        </listitem>
       </varlistentry>
 
+      <varlistentry id="configure-option-with-libnuma">
+       <term><option>--with-libnuma</option></term>
+       <listitem>
+        <para>
+         Build with libnuma support for basic NUMA support.
+         Only supported on Linux.
+        </para>
+       </listitem>
+      </varlistentry>
+
       <varlistentry id="configure-option-with-libxml">
        <term><option>--with-libxml</option></term>
        <listitem>
@@ -2611,6 +2621,16 @@ ninja install
       </listitem>
      </varlistentry>
 
+     <varlistentry id="configure-with-libnuma-meson">
+      <term><option>-Dlibnuma={ auto | enabled | disabled }</option></term>
+      <listitem>
+       <para>
+        Build with libnuma support for basic NUMA support.
+        Only supported on Linux. The default for this option is auto.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry id="configure-with-libxml-meson">
       <term><option>-Dlibxml={ auto | enabled | disabled }</option></term>
       <listitem>
diff --git a/meson.build b/meson.build
index 13c13748e5..f81092eb66 100644
--- a/meson.build
+++ b/meson.build
@@ -949,6 +949,21 @@ else
 endif
 
 
+###############################################################
+# Library: libnuma
+###############################################################
+
+libnumaopt = get_option('libnuma')
+libnuma = dependency('numa', required: libnumaopt)
+if not libnuma.found()
+  libnuma = cc.find_library('numa', required: libnumaopt)
+endif
+if libnuma.found()
+  cdata.set('USE_LIBNUMA', 1)
+else
+  libnuma = not_found_dep
+endif
+
 
 ###############################################################
 # Library: libxml
@@ -3168,6 +3183,7 @@ backend_both_deps += [
   icu_i18n,
   ldap,
   libintl,
+  libnuma,
   libxml,
   lz4,
   pam,
@@ -3823,6 +3839,7 @@ if meson.version().version_compare('>=0.57')
       'icu': icu,
       'ldap': ldap,
       'libcurl': libcurl,
+      'libnuma': libnuma,
       'libxml': libxml,
       'libxslt': libxslt,
       'llvm': llvm,
diff --git a/meson_options.txt b/meson_options.txt
index 702c451714..adaadb5faf 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -106,6 +106,9 @@ option('libcurl', type : 'feature', value: 'auto',
 option('libedit_preferred', type: 'boolean', value: false,
   description: 'Prefer BSD Libedit over GNU Readline')
 
+option('libnuma', type: 'feature', value: 'auto',
+  description: 'NUMA awareness support')
+
 option('libxml', type: 'feature', value: 'auto',
   description: 'XML support')
 
diff --git a/src/Makefile.global.in b/src/Makefile.global.in
index 3b620bac5a..0bd4b2d7d3 100644
--- a/src/Makefile.global.in
+++ b/src/Makefile.global.in
@@ -191,6 +191,7 @@ with_gssapi	= @with_gssapi@
 with_krb_srvnam	= @with_krb_srvnam@
 with_ldap	= @with_ldap@
 with_libcurl	= @with_libcurl@
+with_libnuma	= @with_libnuma@
 with_libxml	= @with_libxml@
 with_libxslt	= @with_libxslt@
 with_llvm	= @with_llvm@
diff --git a/src/backend/Makefile b/src/backend/Makefile
index 42d4a28e5a..bff9f077a8 100644
--- a/src/backend/Makefile
+++ b/src/backend/Makefile
@@ -54,6 +54,9 @@ ifeq ($(with_systemd),yes)
 LIBS += -lsystemd
 endif
 
+# FIXME: filter-out / with/without with_libnuma?
+LIBS += $(LIBNUMA_LIBS)
+
 override LDFLAGS := $(LDFLAGS) $(LDFLAGS_EX) $(LDFLAGS_EX_BE)
 
 ##########################################################################
diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in
index db6454090d..8894f80060 100644
--- a/src/include/pg_config.h.in
+++ b/src/include/pg_config.h.in
@@ -672,6 +672,9 @@
 /* Define to 1 to build with libcurl support. (--with-libcurl) */
 #undef USE_LIBCURL
 
+/* Define to 1 to build with NUMA awareness support. (--with-libnuma) */
+#undef USE_LIBNUMA
+
 /* Define to 1 to build with XML support. (--with-libxml) */
 #undef USE_LIBXML
 
diff --git a/src/include/port/pg_numa.h b/src/include/port/pg_numa.h
new file mode 100644
index 0000000000..d3ebe8b5bd
--- /dev/null
+++ b/src/include/port/pg_numa.h
@@ -0,0 +1,43 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_numa.h
+ *	  Basic NUMA portability routines
+ *
+ *
+ * Copyright (c) 2025, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * 	src/include/port/pg_numa.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef PG_NUMA_H
+#define PG_NUMA_H
+
+#include "c.h"
+
+extern PGDLLIMPORT int pg_numa_init(void);
+extern PGDLLIMPORT int pg_numa_query_pages(int pid, unsigned long count, void **pages, int *status);
+extern PGDLLIMPORT int pg_numa_get_max_node(void);
+extern PGDLLIMPORT Size pg_numa_get_pagesize(void);
+
+#ifdef USE_LIBNUMA
+
+/*
+ * This is required on Linux, before pg_numa_query_pages() as we
+ * need to page-fault before move_pages(2) syscall returns valid results.
+ */
+#define pg_numa_touch_mem_if_required(ro_volatile_var, ptr) \
+	ro_volatile_var = *(uint64 *)ptr
+
+extern void numa_warn(int num, char *fmt,...) pg_attribute_printf(2, 3);
+extern void numa_error(char *where);
+
+#else
+
+#define pg_numa_touch_mem_if_required(ro_volatile_var, ptr) \
+	do {} while(0)
+
+#endif
+
+#endif							/* PG_NUMA_H */
diff --git a/src/makefiles/meson.build b/src/makefiles/meson.build
index 60e13d5023..f786c19160 100644
--- a/src/makefiles/meson.build
+++ b/src/makefiles/meson.build
@@ -199,6 +199,8 @@ pgxs_empty = [
   'PTHREAD_CFLAGS', 'PTHREAD_LIBS',
 
   'ICU_LIBS',
+
+  'LIBNUMA_CFLAGS', 'LIBNUMA_LIBS'
 ]
 
 if host_system == 'windows' and cc.get_argument_syntax() != 'msvc'
@@ -230,6 +232,7 @@ pgxs_deps = {
   'icu': icu,
   'ldap': ldap,
   'libcurl': libcurl,
+  'libnuma': libnuma,
   'libxml': libxml,
   'libxslt': libxslt,
   'llvm': llvm,
diff --git a/src/port/Makefile b/src/port/Makefile
index 4c22431951..a68a29d541 100644
--- a/src/port/Makefile
+++ b/src/port/Makefile
@@ -44,6 +44,7 @@ OBJS = \
 	noblock.o \
 	path.o \
 	pg_bitutils.o \
+	pg_numa.o \
 	pg_popcount_avx512.o \
 	pg_strong_random.o \
 	pgcheckdir.o \
diff --git a/src/port/meson.build b/src/port/meson.build
index 7fcfa728d4..7ffbd4d88d 100644
--- a/src/port/meson.build
+++ b/src/port/meson.build
@@ -7,6 +7,7 @@ pgport_sources = [
   'noblock.c',
   'path.c',
   'pg_bitutils.c',
+  'pg_numa.c',
   'pg_popcount_avx512.c',
   'pg_strong_random.c',
   'pgcheckdir.c',
diff --git a/src/port/pg_numa.c b/src/port/pg_numa.c
new file mode 100644
index 0000000000..db28578bca
--- /dev/null
+++ b/src/port/pg_numa.c
@@ -0,0 +1,150 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_numa.c
+ * 		Basic NUMA portability routines
+ *
+ *
+ * Copyright (c) 2025, PostgreSQL Global Development Group
+ *
+ *
+ * IDENTIFICATION
+ *	  src/port/pg_numa.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "c.h"
+#include "postgres.h"
+#include "port/pg_numa.h"
+#include "storage/pg_shmem.h"
+#include <unistd.h>
+#ifdef WIN32
+#include <windows.h>
+#endif
+
+/*
+ * At this point we provide support only for Linux thanks to libnuma, but in
+ * future support for other platforms e.g. Win32 or FreeBSD might be possible
+ * too. For Win32 NUMA APIs see
+ * https://learn.microsoft.com/en-us/windows/win32/procthread/numa-support
+ */
+#ifdef USE_LIBNUMA
+
+#include <numa.h>
+#include <numaif.h>
+
+/* libnuma requires initialization as per numa(3) on Linux */
+int
+pg_numa_init(void)
+{
+	int			r = numa_available();
+
+	return r;
+}
+
+int
+pg_numa_query_pages(int pid, unsigned long count, void **pages, int *status)
+{
+	return numa_move_pages(pid, count, pages, NULL, status, 0);
+}
+
+int
+pg_numa_get_max_node(void)
+{
+	return numa_max_node();
+}
+
+Size
+pg_numa_get_pagesize(void)
+{
+	Size os_page_size = sysconf(_SC_PAGESIZE);
+	if (huge_pages_status == HUGE_PAGES_ON)
+		 GetHugePageSize(&os_page_size, NULL);
+	return os_page_size;
+}
+
+#ifndef FRONTEND
+/* FIXME not tested, might crash */
+void
+numa_warn(int num, char *fmt,...)
+{
+	va_list		ap;
+	int			olde = errno;
+	int			needed;
+	StringInfoData msg;
+
+	initStringInfo(&msg);
+
+	va_start(ap, fmt);
+	needed = appendStringInfoVA(&msg, fmt, ap);
+	va_end(ap);
+	if (needed > 0)
+	{
+		enlargeStringInfo(&msg, needed);
+		va_start(ap, fmt);
+		appendStringInfoVA(&msg, fmt, ap);
+		va_end(ap);
+	}
+
+	ereport(WARNING,
+			(errcode(ERRCODE_EXTERNAL_ROUTINE_EXCEPTION),
+			 errmsg_internal("libnuma: WARNING: %s", msg.data)));
+
+	pfree(msg.data);
+
+	errno = olde;
+}
+
+void
+numa_error(char *where)
+{
+	int			olde = errno;
+
+	/*
+	 * XXX: for now we issue just WARNING, but long-term that might depend on
+	 * numa_set_strict() here
+	 */
+	elog(WARNING, "libnuma: ERROR: %s", where);
+	errno = olde;
+}
+#endif							/* FRONTEND */
+
+#else
+
+/* Empty wrappers */
+int
+pg_numa_init(void)
+{
+	/* We state NUMA is not available */
+	return -1;
+}
+
+int
+pg_numa_query_pages(int pid, unsigned long count, void **pages, int *status)
+{
+	return 0;
+}
+
+int
+pg_numa_get_max_node(void)
+{
+	return 0;
+}
+
+Size
+pg_numa_get_pagesize(void)
+{
+#ifndef WIN32
+	Size os_page_size = sysconf(_SC_PAGESIZE);
+#else
+	Size os_page_size;
+	SYSTEM_INFO sysinfo;
+	GetSystemInfo(&sysinfo);
+	os_page_size = sysinfo.dwPageSize;
+#endif
+	if (huge_pages_status == HUGE_PAGES_ON)
+		 GetHugePageSize(&os_page_size, NULL);
+	return os_page_size;
+}
+
+#endif
-- 
2.39.5

