From d091528cd990ea84980441bf82064d5b884c6786 Mon Sep 17 00:00:00 2001
From: Jakub Wartak <jakub.wartak@enterprisedb.com>
Date: Fri, 21 Feb 2025 10:19:35 +0100
Subject: [PATCH v16 1/4] Add optional dependency to libnuma (Linux-only) for
 basic NUMA awareness routines and add minimal src/port/pg_numa.c portability
 wrapper. Other platforms can be added later.

This also adds function pg_numa_available() that can be used to check if
the server was linked with NUMA support.

libnuma is unavailable on 32-bit builds, so due to lack of i386 shared object,
we disable it there (it does not make sense anyway on i386 it is very memory
limited platform even with PAE)

Author: Jakub Wartak <jakub.wartak@enterprisedb.com>
Co-authored-by: Bertrand Drouvot <bertranddrouvot.pg@gmail.com>
Reviewed-by: Andres Freund <andres@anarazel.de>
Discussion: https://postgr.es/m/CAKZiRmxh6KWo0aqRqvmcoaX2jUxZYb4kGp3N%3Dq1w%2BDiH-696Xw%40mail.gmail.com
---
 .cirrus.tasks.yml                   |  12 +-
 configure                           |  87 ++++++++++++++
 configure.ac                        |  13 +++
 doc/src/sgml/func.sgml              |  13 +++
 doc/src/sgml/installation.sgml      |  21 ++++
 meson.build                         |  23 ++++
 meson_options.txt                   |   3 +
 src/Makefile.global.in              |   1 +
 src/backend/utils/misc/guc_tables.c |   2 +-
 src/include/catalog/pg_proc.dat     |   4 +
 src/include/pg_config.h.in          |   3 +
 src/include/port/pg_numa.h          |  46 ++++++++
 src/include/storage/pg_shmem.h      |   1 +
 src/makefiles/meson.build           |   3 +
 src/port/Makefile                   |   1 +
 src/port/meson.build                |   1 +
 src/port/pg_numa.c                  | 168 ++++++++++++++++++++++++++++
 17 files changed, 397 insertions(+), 5 deletions(-)
 create mode 100644 src/include/port/pg_numa.h
 create mode 100644 src/port/pg_numa.c

diff --git a/.cirrus.tasks.yml b/.cirrus.tasks.yml
index 86a1fa9bbdb..e6963c774aa 100644
--- a/.cirrus.tasks.yml
+++ b/.cirrus.tasks.yml
@@ -445,8 +445,10 @@ task:
     EOF
 
   setup_additional_packages_script: |
-    #apt-get update
-    #DEBIAN_FRONTEND=noninteractive apt-get -y install ...
+    apt-get update
+    DEBIAN_FRONTEND=noninteractive apt-get -y install \
+      libnuma1 \
+      libnuma-dev
 
   matrix:
     # SPECIAL:
@@ -471,6 +473,7 @@ task:
             --enable-cassert --enable-injection-points --enable-debug \
             --enable-tap-tests --enable-nls \
             --with-segsize-blocks=6 \
+            --with-libnuma \
             --with-liburing \
             \
             ${LINUX_CONFIGURE_FEATURES} \
@@ -523,6 +526,7 @@ task:
             -Dllvm=disabled \
             --pkg-config-path /usr/lib/i386-linux-gnu/pkgconfig/ \
             -DPERL=perl5.36-i386-linux-gnu \
+            -Dlibnuma=disabled \
             build-32
         EOF
 
@@ -839,8 +843,8 @@ task:
     folder: $CCACHE_DIR
 
   setup_additional_packages_script: |
-    #apt-get update
-    #DEBIAN_FRONTEND=noninteractive apt-get -y install ...
+    apt-get update
+    DEBIAN_FRONTEND=noninteractive apt-get -y install libnuma1 libnuma-dev
 
   ###
   # Test that code can be built with gcc/clang without warnings
diff --git a/configure b/configure
index 4dd67a5cc6e..81e43a38331 100755
--- a/configure
+++ b/configure
@@ -711,6 +711,7 @@ with_libxml
 LIBCURL_LIBS
 LIBCURL_CFLAGS
 with_libcurl
+with_libnuma
 with_uuid
 LIBURING_LIBS
 LIBURING_CFLAGS
@@ -872,6 +873,7 @@ with_liburing
 with_uuid
 with_ossp_uuid
 with_libcurl
+with_libnuma
 with_libxml
 with_libxslt
 with_system_tzdata
@@ -1588,6 +1590,7 @@ Optional Packages:
   --with-uuid=LIB         build contrib/uuid-ossp using LIB (bsd,e2fs,ossp)
   --with-ossp-uuid        obsolete spelling of --with-uuid=ossp
   --with-libcurl          build with libcurl support
+  --with-libnuma          build with libnuma support
   --with-libxml           build with XML support
   --with-libxslt          use XSLT support when building contrib/xml2
   --with-system-tzdata=DIR
@@ -9279,6 +9282,33 @@ fi
 
 
 
+#
+# NUMA 
+#
+
+
+
+# Check whether --with-libnuma was given.
+if test "${with_libnuma+set}" = set; then :
+  withval=$with_libnuma;
+  case $withval in
+    yes)
+
+$as_echo "#define USE_LIBNUMA 1" >>confdefs.h
+
+      ;;
+    no)
+      :
+      ;;
+    *)
+      as_fn_error $? "no argument expected for --with-libnuma option" "$LINENO" 5
+      ;;
+  esac
+
+else
+  with_libnuma=no
+
+fi
 
 
 
@@ -12517,6 +12547,63 @@ fi
 
 fi
 
+if test "$with_libnuma" = yes ; then
+
+  ac_fn_c_check_header_mongrel "$LINENO" "numa.h" "ac_cv_header_numa_h" "$ac_includes_default"
+if test "x$ac_cv_header_numa_h" = xyes; then :
+
+else
+  as_fn_error $? "header file <numa.h> is required for --with-libnuma" "$LINENO" 5
+fi
+
+
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for numa_available in -lnuma" >&5
+$as_echo_n "checking for numa_available in -lnuma... " >&6; }
+if ${ac_cv_lib_numa_numa_available+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-lnuma  $LIBS"
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char numa_available ();
+int
+main ()
+{
+return numa_available ();
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+  ac_cv_lib_numa_numa_available=yes
+else
+  ac_cv_lib_numa_numa_available=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_numa_numa_available" >&5
+$as_echo "$ac_cv_lib_numa_numa_available" >&6; }
+if test "x$ac_cv_lib_numa_numa_available" = xyes; then :
+
+  LIBS="-lnuma $LIBS"
+
+else
+  as_fn_error $? "library 'numa' does not provide numa_available" "$LINENO" 5
+fi
+
+fi
+
 # XXX libcurl must link after libgssapi_krb5 on FreeBSD to avoid segfaults
 # during gss_acquire_cred(). This is possibly related to Curl's Heimdal
 # dependency on that platform?
diff --git a/configure.ac b/configure.ac
index 537e654e7b3..1879baf183a 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1053,6 +1053,19 @@ if test "$with_libcurl" = yes ; then
 fi
 
 
+#
+# libnuma
+#
+AC_MSG_CHECKING([whether to build with libnuma support])
+PGAC_ARG_BOOL(with, libnuma, no, [use libnuma for NUMA awareness],
+              [AC_DEFINE([USE_LIBNUMA], 1, [Define to build with NUMA awareness support. (--with-libnuma)])])
+AC_MSG_RESULT([$with_libnuma])
+AC_SUBST(with_libnuma)
+
+if test "$with_libnuma" = yes ; then
+  AC_CHECK_LIB(numa,    numa_available, [], [AC_MSG_ERROR([library 'libnuma' is required for NUMA awareness])])
+fi
+
 #
 # XML
 #
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 5bf6656deca..1f98826d16d 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -25138,6 +25138,19 @@ SELECT * FROM pg_ls_dir('.') WITH ORDINALITY AS t(ls,n);
        </para></entry>
       </row>
 
+      <row>
+       <entry role="func_table_entry"><para role="func_signature">
+        <indexterm>
+         <primary>pg_numa_available</primary>
+        </indexterm>
+        <function>pg_numa_available</function> ()
+        <returnvalue>boolean</returnvalue>
+       </para>
+       <para>
+        Returns true if the server has been compiled with <acronym>NUMA</acronym> support.
+       </para></entry>
+      </row>
+
       <row>
        <entry role="func_table_entry"><para role="func_signature">
         <indexterm>
diff --git a/doc/src/sgml/installation.sgml b/doc/src/sgml/installation.sgml
index cc28f041330..5f0486bb335 100644
--- a/doc/src/sgml/installation.sgml
+++ b/doc/src/sgml/installation.sgml
@@ -1156,6 +1156,16 @@ build-postgresql:
        </listitem>
       </varlistentry>
 
+      <varlistentry id="configure-option-with-libnuma">
+       <term><option>--with-libnuma</option></term>
+       <listitem>
+        <para>
+         Build with libnuma support for basic NUMA support.
+         Only supported on platforms for which the libnuma library is implemented.
+        </para>
+       </listitem>
+      </varlistentry>
+
       <varlistentry id="configure-option-with-liburing">
        <term><option>--with-liburing</option></term>
        <listitem>
@@ -2645,6 +2655,17 @@ ninja install
       </listitem>
      </varlistentry>
 
+     <varlistentry id="configure-with-libnuma-meson">
+      <term><option>-Dlibnuma={ auto | enabled | disabled }</option></term>
+      <listitem>
+       <para>
+        Build with libnuma support for basic NUMA support.
+        Only supported on platforms for which the libnuma library is implemented.
+        The default for this option is auto.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry id="configure-with-libxml-meson">
       <term><option>-Dlibxml={ auto | enabled | disabled }</option></term>
       <listitem>
diff --git a/meson.build b/meson.build
index 187f1787a3c..52c4f3c1022 100644
--- a/meson.build
+++ b/meson.build
@@ -943,6 +943,27 @@ else
 endif
 
 
+###############################################################
+# Library: libnuma
+###############################################################
+
+libnumaopt = get_option('libnuma')
+if not libnumaopt.disabled()
+  # via pkg-config
+  libnuma = dependency('numa', required: libnumaopt)
+  if not libnuma.found()
+    libnuma = cc.find_library('numa', required: libnumaopt)
+  endif
+  if not cc.has_header('numa.h', dependencies: libnuma, required: libnumaopt)
+    libnuma = not_found_dep
+  endif
+  if libnuma.found()
+    cdata.set('USE_LIBNUMA', 1)
+  endif
+else
+  libnuma = not_found_dep
+endif
+
 
 ###############################################################
 # Library: liburing
@@ -3177,6 +3198,7 @@ backend_both_deps += [
   icu_i18n,
   ldap,
   libintl,
+  libnuma,
   liburing,
   libxml,
   lz4,
@@ -3833,6 +3855,7 @@ if meson.version().version_compare('>=0.57')
       'icu': icu,
       'ldap': ldap,
       'libcurl': libcurl,
+      'libnuma': libnuma,
       'liburing': liburing,
       'libxml': libxml,
       'libxslt': libxslt,
diff --git a/meson_options.txt b/meson_options.txt
index dd7126da3a7..8675e1b5d87 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -106,6 +106,9 @@ option('libcurl', type : 'feature', value: 'auto',
 option('libedit_preferred', type: 'boolean', value: false,
   description: 'Prefer BSD Libedit over GNU Readline')
 
+option('libnuma', type: 'feature', value: 'auto',
+  description: 'NUMA awareness support')
+
 option('liburing', type : 'feature', value: 'auto',
   description: 'io_uring support, for asynchronous I/O')
 
diff --git a/src/Makefile.global.in b/src/Makefile.global.in
index cce29a37ac5..71479ad9018 100644
--- a/src/Makefile.global.in
+++ b/src/Makefile.global.in
@@ -196,6 +196,7 @@ with_gssapi	= @with_gssapi@
 with_krb_srvnam	= @with_krb_srvnam@
 with_ldap	= @with_ldap@
 with_libcurl	= @with_libcurl@
+with_libnuma	= @with_libnuma@
 with_liburing	= @with_liburing@
 with_libxml	= @with_libxml@
 with_libxslt	= @with_libxslt@
diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c
index 989825d3a9c..a80616d4455 100644
--- a/src/backend/utils/misc/guc_tables.c
+++ b/src/backend/utils/misc/guc_tables.c
@@ -566,7 +566,7 @@ static int	ssl_renegotiation_limit;
  */
 int			huge_pages = HUGE_PAGES_TRY;
 int			huge_page_size;
-static int	huge_pages_status = HUGE_PAGES_UNKNOWN;
+int			huge_pages_status = HUGE_PAGES_UNKNOWN;
 
 /*
  * These variables are all dummies that don't do anything, except in some
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index 8b68b16d79d..d532b8c43b9 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -8506,6 +8506,10 @@
   proargnames => '{name,off,size,allocated_size}',
   prosrc => 'pg_get_shmem_allocations' },
 
+{ oid => '9685', descr => 'Is NUMA compilation available?',
+  proname => 'pg_numa_available', provolatile => 'v', prorettype => 'bool',
+  proargtypes => '', prosrc => 'pg_numa_available' },
+
 # memory context of local backend
 { oid => '2282',
   descr => 'information about all memory contexts of local backend',
diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in
index c6f055b3905..424d42b14f8 100644
--- a/src/include/pg_config.h.in
+++ b/src/include/pg_config.h.in
@@ -675,6 +675,9 @@
 /* Define to 1 to build with libcurl support. (--with-libcurl) */
 #undef USE_LIBCURL
 
+/* Define to 1 to build with NUMA awareness support. (--with-libnuma) */
+#undef USE_LIBNUMA
+
 /* Define to build with io_uring support. (--with-liburing) */
 #undef USE_LIBURING
 
diff --git a/src/include/port/pg_numa.h b/src/include/port/pg_numa.h
new file mode 100644
index 00000000000..986152e0942
--- /dev/null
+++ b/src/include/port/pg_numa.h
@@ -0,0 +1,46 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_numa.h
+ *	  Basic NUMA portability routines
+ *
+ *
+ * Copyright (c) 2025, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * 	src/include/port/pg_numa.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef PG_NUMA_H
+#define PG_NUMA_H
+
+#include "c.h"
+#include "postgres.h"
+#include "fmgr.h"
+
+extern PGDLLIMPORT int pg_numa_init(void);
+extern PGDLLIMPORT int pg_numa_query_pages(int pid, unsigned long count, void **pages, int *status);
+extern PGDLLIMPORT int pg_numa_get_max_node(void);
+extern PGDLLIMPORT Size pg_numa_get_pagesize(void);
+extern PGDLLIMPORT Datum pg_numa_available(PG_FUNCTION_ARGS);
+
+#ifdef USE_LIBNUMA
+
+/*
+ * This is required on Linux, before pg_numa_query_pages() as we
+ * need to page-fault before move_pages(2) syscall returns valid results.
+ */
+#define pg_numa_touch_mem_if_required(ro_volatile_var, ptr) \
+	ro_volatile_var = *(uint64 *)ptr
+
+extern void numa_warn(int num, char *fmt,...) pg_attribute_printf(2, 3);
+extern void numa_error(char *where);
+
+#else
+
+#define pg_numa_touch_mem_if_required(ro_volatile_var, ptr) \
+	do {} while(0)
+
+#endif
+
+#endif							/* PG_NUMA_H */
diff --git a/src/include/storage/pg_shmem.h b/src/include/storage/pg_shmem.h
index b99ebc9e86f..5f7d4b83a60 100644
--- a/src/include/storage/pg_shmem.h
+++ b/src/include/storage/pg_shmem.h
@@ -45,6 +45,7 @@ typedef struct PGShmemHeader	/* standard header for all Postgres shmem */
 extern PGDLLIMPORT int shared_memory_type;
 extern PGDLLIMPORT int huge_pages;
 extern PGDLLIMPORT int huge_page_size;
+extern PGDLLIMPORT int huge_pages_status;
 
 /* Possible values for huge_pages and huge_pages_status */
 typedef enum
diff --git a/src/makefiles/meson.build b/src/makefiles/meson.build
index 46d8da070e8..55da678ec27 100644
--- a/src/makefiles/meson.build
+++ b/src/makefiles/meson.build
@@ -200,6 +200,8 @@ pgxs_empty = [
 
   'ICU_LIBS',
 
+  'LIBNUMA_CFLAGS', 'LIBNUMA_LIBS',
+
   'LIBURING_CFLAGS', 'LIBURING_LIBS',
 ]
 
@@ -232,6 +234,7 @@ pgxs_deps = {
   'icu': icu,
   'ldap': ldap,
   'libcurl': libcurl,
+  'libnuma': libnuma,
   'liburing': liburing,
   'libxml': libxml,
   'libxslt': libxslt,
diff --git a/src/port/Makefile b/src/port/Makefile
index 7843d7b67cb..8c8e6b92910 100644
--- a/src/port/Makefile
+++ b/src/port/Makefile
@@ -45,6 +45,7 @@ OBJS = \
 	path.o \
 	pg_bitutils.o \
 	pg_localeconv_r.o \
+	pg_numa.o \
 	pg_popcount_avx512.o \
 	pg_strong_random.o \
 	pgcheckdir.o \
diff --git a/src/port/meson.build b/src/port/meson.build
index 653539ba5b3..1eb8e38d047 100644
--- a/src/port/meson.build
+++ b/src/port/meson.build
@@ -8,6 +8,7 @@ pgport_sources = [
   'path.c',
   'pg_bitutils.c',
   'pg_localeconv_r.c',
+  'pg_numa.c',
   'pg_popcount_avx512.c',
   'pg_strong_random.c',
   'pgcheckdir.c',
diff --git a/src/port/pg_numa.c b/src/port/pg_numa.c
new file mode 100644
index 00000000000..7d905ef31f5
--- /dev/null
+++ b/src/port/pg_numa.c
@@ -0,0 +1,168 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_numa.c
+ * 		Basic NUMA portability routines
+ *
+ *
+ * Copyright (c) 2025, PostgreSQL Global Development Group
+ *
+ *
+ * IDENTIFICATION
+ *	  src/port/pg_numa.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+#include <unistd.h>
+
+#ifdef WIN32
+#include <windows.h>
+#endif
+
+#include "fmgr.h"
+#include "port/pg_numa.h"
+#include "storage/pg_shmem.h"
+
+/*
+ * At this point we provide support only for Linux thanks to libnuma, but in
+ * future support for other platforms e.g. Win32 or FreeBSD might be possible
+ * too. For Win32 NUMA APIs see
+ * https://learn.microsoft.com/en-us/windows/win32/procthread/numa-support
+ */
+#ifdef USE_LIBNUMA
+
+#include <numa.h>
+#include <numaif.h>
+
+/* libnuma requires initialization as per numa(3) on Linux */
+int
+pg_numa_init(void)
+{
+	int			r = numa_available();
+
+	return r;
+}
+
+int
+pg_numa_query_pages(int pid, unsigned long count, void **pages, int *status)
+{
+	return numa_move_pages(pid, count, pages, NULL, status, 0);
+}
+
+int
+pg_numa_get_max_node(void)
+{
+	return numa_max_node();
+}
+
+Size
+pg_numa_get_pagesize(void)
+{
+	Size		os_page_size = sysconf(_SC_PAGESIZE);
+
+	if (huge_pages_status == HUGE_PAGES_ON)
+		GetHugePageSize(&os_page_size, NULL);
+
+	return os_page_size;
+}
+
+#ifndef FRONTEND
+/*
+ * XXX: not really tested as there is no way to trigger this in our
+ * current usage of libnuma.
+ *
+ * The libnuma built-in code can be seen here:
+ * https://github.com/numactl/numactl/blob/master/libnuma.c
+ *
+ */
+void
+numa_warn(int num, char *fmt,...)
+{
+	va_list		ap;
+	int			olde = errno;
+	int			needed;
+	StringInfoData msg;
+
+	initStringInfo(&msg);
+
+	va_start(ap, fmt);
+	needed = appendStringInfoVA(&msg, fmt, ap);
+	va_end(ap);
+	if (needed > 0)
+	{
+		enlargeStringInfo(&msg, needed);
+		va_start(ap, fmt);
+		appendStringInfoVA(&msg, fmt, ap);
+		va_end(ap);
+	}
+
+	ereport(WARNING,
+			(errcode(ERRCODE_EXTERNAL_ROUTINE_EXCEPTION),
+			 errmsg_internal("libnuma: WARNING: %s", msg.data)));
+
+	pfree(msg.data);
+
+	errno = olde;
+}
+
+void
+numa_error(char *where)
+{
+	int			olde = errno;
+
+	/*
+	 * XXX: for now we issue just WARNING, but long-term that might depend on
+	 * numa_set_strict() here.
+	 */
+	elog(WARNING, "libnuma: ERROR: %s", where);
+	errno = olde;
+}
+#endif							/* FRONTEND */
+
+#else
+
+/* Empty wrappers */
+int
+pg_numa_init(void)
+{
+	/* We state that NUMA is not available */
+	return -1;
+}
+
+int
+pg_numa_query_pages(int pid, unsigned long count, void **pages, int *status)
+{
+	return 0;
+}
+
+int
+pg_numa_get_max_node(void)
+{
+	return 0;
+}
+
+Size
+pg_numa_get_pagesize(void)
+{
+#ifndef WIN32
+	Size		os_page_size = sysconf(_SC_PAGESIZE);
+#else
+	Size		os_page_size;
+	SYSTEM_INFO sysinfo;
+
+	GetSystemInfo(&sysinfo);
+	os_page_size = sysinfo.dwPageSize;
+#endif
+	if (huge_pages_status == HUGE_PAGES_ON)
+		GetHugePageSize(&os_page_size, NULL);
+	return os_page_size;
+}
+
+#endif
+
+Datum
+pg_numa_available(PG_FUNCTION_ARGS)
+{
+	PG_RETURN_BOOL(pg_numa_init() != -1);
+}
-- 
2.39.5

