From 244d6e9c95adf8857219bbfefefe76c07addde66 Mon Sep 17 00:00:00 2001
From: Jakub Wartak <jakub.wartak@enterprisedb.com>
Date: Fri, 21 Feb 2025 10:19:35 +0100
Subject: [PATCH v15 1/4] Add optional dependency to libnuma (Linux-only) for
 basic NUMA awareness routines and add minimal src/port/pg_numa.c portability
 wrapper. Other platforms can be added later.

This also adds function pg_numa_available() that can be used to check if
the server was linked with NUMA support.

libnuma is unavailable on 32-bit builds, so due to lack of i386 shared object,
we disable it there (it does not make sense anyway on i386 it is very memory
limited platform even with PAE)

Author: Jakub Wartak <jakub.wartak@enterprisedb.com>
Co-authored-by: Bertrand Drouvot <bertranddrouvot.pg@gmail.com>
Reviewed-by: Andres Freund <andres@anarazel.de>
Discussion: https://postgr.es/m/CAKZiRmxh6KWo0aqRqvmcoaX2jUxZYb4kGp3N%3Dq1w%2BDiH-696Xw%40mail.gmail.com
---
 .cirrus.tasks.yml                   |  12 +-
 configure                           |  87 ++++++++++++++
 configure.ac                        |  13 +++
 doc/src/sgml/func.sgml              |  13 +++
 doc/src/sgml/installation.sgml      |  21 ++++
 meson.build                         |  23 ++++
 meson_options.txt                   |   3 +
 src/Makefile.global.in              |   1 +
 src/backend/utils/misc/guc_tables.c |   2 +-
 src/include/catalog/pg_proc.dat     |   4 +
 src/include/pg_config.h.in          |   3 +
 src/include/port/pg_numa.h          |  46 ++++++++
 src/include/storage/pg_shmem.h      |   1 +
 src/makefiles/meson.build           |   3 +
 src/port/Makefile                   |   1 +
 src/port/meson.build                |   1 +
 src/port/pg_numa.c                  | 168 ++++++++++++++++++++++++++++
 17 files changed, 397 insertions(+), 5 deletions(-)
 create mode 100644 src/include/port/pg_numa.h
 create mode 100644 src/port/pg_numa.c

diff --git a/.cirrus.tasks.yml b/.cirrus.tasks.yml
index 5849cbb839a..7010dff7aef 100644
--- a/.cirrus.tasks.yml
+++ b/.cirrus.tasks.yml
@@ -445,8 +445,10 @@ task:
     EOF
 
   setup_additional_packages_script: |
-    #apt-get update
-    #DEBIAN_FRONTEND=noninteractive apt-get -y install ...
+    apt-get update
+    DEBIAN_FRONTEND=noninteractive apt-get -y install \
+      libnuma1 \
+      libnuma-dev
 
   matrix:
     # SPECIAL:
@@ -471,6 +473,7 @@ task:
             --enable-cassert --enable-injection-points --enable-debug \
             --enable-tap-tests --enable-nls \
             --with-segsize-blocks=6 \
+            --with-libnuma \
             \
             ${LINUX_CONFIGURE_FEATURES} \
             \
@@ -519,6 +522,7 @@ task:
             -Dllvm=disabled \
             --pkg-config-path /usr/lib/i386-linux-gnu/pkgconfig/ \
             -DPERL=perl5.36-i386-linux-gnu \
+            -Dlibnuma=disabled \
             build-32
         EOF
 
@@ -835,8 +839,8 @@ task:
     folder: $CCACHE_DIR
 
   setup_additional_packages_script: |
-    #apt-get update
-    #DEBIAN_FRONTEND=noninteractive apt-get -y install ...
+    apt-get update
+    DEBIAN_FRONTEND=noninteractive apt-get -y install libnuma1 libnuma-dev
 
   ###
   # Test that code can be built with gcc/clang without warnings
diff --git a/configure b/configure
index 559f535f5cd..0931331f627 100755
--- a/configure
+++ b/configure
@@ -711,6 +711,7 @@ with_libxml
 LIBCURL_LIBS
 LIBCURL_CFLAGS
 with_libcurl
+with_libnuma
 with_uuid
 with_readline
 with_systemd
@@ -868,6 +869,7 @@ with_libedit_preferred
 with_uuid
 with_ossp_uuid
 with_libcurl
+with_libnuma
 with_libxml
 with_libxslt
 with_system_tzdata
@@ -1581,6 +1583,7 @@ Optional Packages:
   --with-uuid=LIB         build contrib/uuid-ossp using LIB (bsd,e2fs,ossp)
   --with-ossp-uuid        obsolete spelling of --with-uuid=ossp
   --with-libcurl          build with libcurl support
+  --with-libnuma          build with libnuma support
   --with-libxml           build with XML support
   --with-libxslt          use XSLT support when building contrib/xml2
   --with-system-tzdata=DIR
@@ -9140,6 +9143,33 @@ fi
 
 
 
+#
+# NUMA 
+#
+
+
+
+# Check whether --with-libnuma was given.
+if test "${with_libnuma+set}" = set; then :
+  withval=$with_libnuma;
+  case $withval in
+    yes)
+
+$as_echo "#define USE_LIBNUMA 1" >>confdefs.h
+
+      ;;
+    no)
+      :
+      ;;
+    *)
+      as_fn_error $? "no argument expected for --with-libnuma option" "$LINENO" 5
+      ;;
+  esac
+
+else
+  with_libnuma=no
+
+fi
 
 
 
@@ -12378,6 +12408,63 @@ fi
 
 fi
 
+if test "$with_libnuma" = yes ; then
+
+  ac_fn_c_check_header_mongrel "$LINENO" "numa.h" "ac_cv_header_numa_h" "$ac_includes_default"
+if test "x$ac_cv_header_numa_h" = xyes; then :
+
+else
+  as_fn_error $? "header file <numa.h> is required for --with-libnuma" "$LINENO" 5
+fi
+
+
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for numa_available in -lnuma" >&5
+$as_echo_n "checking for numa_available in -lnuma... " >&6; }
+if ${ac_cv_lib_numa_numa_available+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-lnuma  $LIBS"
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char numa_available ();
+int
+main ()
+{
+return numa_available ();
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+  ac_cv_lib_numa_numa_available=yes
+else
+  ac_cv_lib_numa_numa_available=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_numa_numa_available" >&5
+$as_echo "$ac_cv_lib_numa_numa_available" >&6; }
+if test "x$ac_cv_lib_numa_numa_available" = xyes; then :
+
+  LIBS="-lnuma $LIBS"
+
+else
+  as_fn_error $? "library 'numa' does not provide numa_available" "$LINENO" 5
+fi
+
+fi
+
 # XXX libcurl must link after libgssapi_krb5 on FreeBSD to avoid segfaults
 # during gss_acquire_cred(). This is possibly related to Curl's Heimdal
 # dependency on that platform?
diff --git a/configure.ac b/configure.ac
index b6d02f5ecc7..1a394dfc077 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1041,6 +1041,19 @@ if test "$with_libcurl" = yes ; then
 fi
 
 
+#
+# libnuma
+#
+AC_MSG_CHECKING([whether to build with libnuma support])
+PGAC_ARG_BOOL(with, libnuma, no, [use libnuma for NUMA awareness],
+              [AC_DEFINE([USE_LIBNUMA], 1, [Define to build with NUMA awareness support. (--with-libnuma)])])
+AC_MSG_RESULT([$with_libnuma])
+AC_SUBST(with_libnuma)
+
+if test "$with_libnuma" = yes ; then
+  AC_CHECK_LIB(numa,    numa_available, [], [AC_MSG_ERROR([library 'libnuma' is required for NUMA awareness])])
+fi
+
 #
 # XML
 #
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 2ab5661602c..d7b33c67ec6 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -25078,6 +25078,19 @@ SELECT * FROM pg_ls_dir('.') WITH ORDINALITY AS t(ls,n);
        </para></entry>
       </row>
 
+      <row>
+       <entry role="func_table_entry"><para role="func_signature">
+        <indexterm>
+         <primary>pg_numa_available</primary>
+        </indexterm>
+        <function>pg_numa_available</function> ()
+        <returnvalue>boolean</returnvalue>
+       </para>
+       <para>
+        Returns true if the server has been compiled with <acronym>NUMA</acronym> support.
+       </para></entry>
+      </row>
+
       <row>
        <entry role="func_table_entry"><para role="func_signature">
         <indexterm>
diff --git a/doc/src/sgml/installation.sgml b/doc/src/sgml/installation.sgml
index e076cefa3b9..9f56205a1d7 100644
--- a/doc/src/sgml/installation.sgml
+++ b/doc/src/sgml/installation.sgml
@@ -1156,6 +1156,16 @@ build-postgresql:
        </listitem>
       </varlistentry>
 
+      <varlistentry id="configure-option-with-libnuma">
+       <term><option>--with-libnuma</option></term>
+       <listitem>
+        <para>
+         Build with libnuma support for basic NUMA support.
+         Only supported on platforms for which the libnuma library is implemented.
+        </para>
+       </listitem>
+      </varlistentry>
+
       <varlistentry id="configure-option-with-libxml">
        <term><option>--with-libxml</option></term>
        <listitem>
@@ -2611,6 +2621,17 @@ ninja install
       </listitem>
      </varlistentry>
 
+     <varlistentry id="configure-with-libnuma-meson">
+      <term><option>-Dlibnuma={ auto | enabled | disabled }</option></term>
+      <listitem>
+       <para>
+        Build with libnuma support for basic NUMA support.
+        Only supported on platforms for which the libnuma library is implemented.
+        The default for this option is auto.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry id="configure-with-libxml-meson">
       <term><option>-Dlibxml={ auto | enabled | disabled }</option></term>
       <listitem>
diff --git a/meson.build b/meson.build
index b6daa5b7040..b4cb6929bfc 100644
--- a/meson.build
+++ b/meson.build
@@ -943,6 +943,27 @@ else
 endif
 
 
+###############################################################
+# Library: libnuma
+###############################################################
+
+libnumaopt = get_option('libnuma')
+if not libnumaopt.disabled()
+  # via pkg-config
+  libnuma = dependency('numa', required: libnumaopt)
+  if not libnuma.found()
+    libnuma = cc.find_library('numa', required: libnumaopt)
+  endif
+  if not cc.has_header('numa.h', dependencies: libnuma, required: libnumaopt)
+    libnuma = not_found_dep
+  endif
+  if libnuma.found()
+    cdata.set('USE_LIBNUMA', 1)
+  endif
+else
+  libnuma = not_found_dep
+endif
+
 
 ###############################################################
 # Library: libxml
@@ -3162,6 +3183,7 @@ backend_both_deps += [
   icu_i18n,
   ldap,
   libintl,
+  libnuma,
   libxml,
   lz4,
   pam,
@@ -3817,6 +3839,7 @@ if meson.version().version_compare('>=0.57')
       'icu': icu,
       'ldap': ldap,
       'libcurl': libcurl,
+      'libnuma': libnuma,
       'libxml': libxml,
       'libxslt': libxslt,
       'llvm': llvm,
diff --git a/meson_options.txt b/meson_options.txt
index 702c4517145..adaadb5faf1 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -106,6 +106,9 @@ option('libcurl', type : 'feature', value: 'auto',
 option('libedit_preferred', type: 'boolean', value: false,
   description: 'Prefer BSD Libedit over GNU Readline')
 
+option('libnuma', type: 'feature', value: 'auto',
+  description: 'NUMA awareness support')
+
 option('libxml', type: 'feature', value: 'auto',
   description: 'XML support')
 
diff --git a/src/Makefile.global.in b/src/Makefile.global.in
index 8fe9d61e82a..7ff45cf86e7 100644
--- a/src/Makefile.global.in
+++ b/src/Makefile.global.in
@@ -196,6 +196,7 @@ with_gssapi	= @with_gssapi@
 with_krb_srvnam	= @with_krb_srvnam@
 with_ldap	= @with_ldap@
 with_libcurl	= @with_libcurl@
+with_libnuma	= @with_libnuma@
 with_libxml	= @with_libxml@
 with_libxslt	= @with_libxslt@
 with_llvm	= @with_llvm@
diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c
index cc8f2b1230a..ae5452d9539 100644
--- a/src/backend/utils/misc/guc_tables.c
+++ b/src/backend/utils/misc/guc_tables.c
@@ -566,7 +566,7 @@ static int	ssl_renegotiation_limit;
  */
 int			huge_pages = HUGE_PAGES_TRY;
 int			huge_page_size;
-static int	huge_pages_status = HUGE_PAGES_UNKNOWN;
+int			huge_pages_status = HUGE_PAGES_UNKNOWN;
 
 /*
  * These variables are all dummies that don't do anything, except in some
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index 890822eaf79..85902903653 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -8492,6 +8492,10 @@
   proargnames => '{name,off,size,allocated_size}',
   prosrc => 'pg_get_shmem_allocations' },
 
+{ oid => '9685', descr => 'Is NUMA compilation available?',
+  proname => 'pg_numa_available', provolatile => 'v', prorettype => 'bool',
+  proargtypes => '', prosrc => 'pg_numa_available' },
+
 # memory context of local backend
 { oid => '2282',
   descr => 'information about all memory contexts of local backend',
diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in
index db6454090d2..8894f800607 100644
--- a/src/include/pg_config.h.in
+++ b/src/include/pg_config.h.in
@@ -672,6 +672,9 @@
 /* Define to 1 to build with libcurl support. (--with-libcurl) */
 #undef USE_LIBCURL
 
+/* Define to 1 to build with NUMA awareness support. (--with-libnuma) */
+#undef USE_LIBNUMA
+
 /* Define to 1 to build with XML support. (--with-libxml) */
 #undef USE_LIBXML
 
diff --git a/src/include/port/pg_numa.h b/src/include/port/pg_numa.h
new file mode 100644
index 00000000000..986152e0942
--- /dev/null
+++ b/src/include/port/pg_numa.h
@@ -0,0 +1,46 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_numa.h
+ *	  Basic NUMA portability routines
+ *
+ *
+ * Copyright (c) 2025, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * 	src/include/port/pg_numa.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef PG_NUMA_H
+#define PG_NUMA_H
+
+#include "c.h"
+#include "postgres.h"
+#include "fmgr.h"
+
+extern PGDLLIMPORT int pg_numa_init(void);
+extern PGDLLIMPORT int pg_numa_query_pages(int pid, unsigned long count, void **pages, int *status);
+extern PGDLLIMPORT int pg_numa_get_max_node(void);
+extern PGDLLIMPORT Size pg_numa_get_pagesize(void);
+extern PGDLLIMPORT Datum pg_numa_available(PG_FUNCTION_ARGS);
+
+#ifdef USE_LIBNUMA
+
+/*
+ * This is required on Linux, before pg_numa_query_pages() as we
+ * need to page-fault before move_pages(2) syscall returns valid results.
+ */
+#define pg_numa_touch_mem_if_required(ro_volatile_var, ptr) \
+	ro_volatile_var = *(uint64 *)ptr
+
+extern void numa_warn(int num, char *fmt,...) pg_attribute_printf(2, 3);
+extern void numa_error(char *where);
+
+#else
+
+#define pg_numa_touch_mem_if_required(ro_volatile_var, ptr) \
+	do {} while(0)
+
+#endif
+
+#endif							/* PG_NUMA_H */
diff --git a/src/include/storage/pg_shmem.h b/src/include/storage/pg_shmem.h
index b99ebc9e86f..5f7d4b83a60 100644
--- a/src/include/storage/pg_shmem.h
+++ b/src/include/storage/pg_shmem.h
@@ -45,6 +45,7 @@ typedef struct PGShmemHeader	/* standard header for all Postgres shmem */
 extern PGDLLIMPORT int shared_memory_type;
 extern PGDLLIMPORT int huge_pages;
 extern PGDLLIMPORT int huge_page_size;
+extern PGDLLIMPORT int huge_pages_status;
 
 /* Possible values for huge_pages and huge_pages_status */
 typedef enum
diff --git a/src/makefiles/meson.build b/src/makefiles/meson.build
index 60e13d50235..f786c191605 100644
--- a/src/makefiles/meson.build
+++ b/src/makefiles/meson.build
@@ -199,6 +199,8 @@ pgxs_empty = [
   'PTHREAD_CFLAGS', 'PTHREAD_LIBS',
 
   'ICU_LIBS',
+
+  'LIBNUMA_CFLAGS', 'LIBNUMA_LIBS'
 ]
 
 if host_system == 'windows' and cc.get_argument_syntax() != 'msvc'
@@ -230,6 +232,7 @@ pgxs_deps = {
   'icu': icu,
   'ldap': ldap,
   'libcurl': libcurl,
+  'libnuma': libnuma,
   'libxml': libxml,
   'libxslt': libxslt,
   'llvm': llvm,
diff --git a/src/port/Makefile b/src/port/Makefile
index 4c224319512..a68a29d5414 100644
--- a/src/port/Makefile
+++ b/src/port/Makefile
@@ -44,6 +44,7 @@ OBJS = \
 	noblock.o \
 	path.o \
 	pg_bitutils.o \
+	pg_numa.o \
 	pg_popcount_avx512.o \
 	pg_strong_random.o \
 	pgcheckdir.o \
diff --git a/src/port/meson.build b/src/port/meson.build
index 7fcfa728d43..7ffbd4d88d2 100644
--- a/src/port/meson.build
+++ b/src/port/meson.build
@@ -7,6 +7,7 @@ pgport_sources = [
   'noblock.c',
   'path.c',
   'pg_bitutils.c',
+  'pg_numa.c',
   'pg_popcount_avx512.c',
   'pg_strong_random.c',
   'pgcheckdir.c',
diff --git a/src/port/pg_numa.c b/src/port/pg_numa.c
new file mode 100644
index 00000000000..7d905ef31f5
--- /dev/null
+++ b/src/port/pg_numa.c
@@ -0,0 +1,168 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_numa.c
+ * 		Basic NUMA portability routines
+ *
+ *
+ * Copyright (c) 2025, PostgreSQL Global Development Group
+ *
+ *
+ * IDENTIFICATION
+ *	  src/port/pg_numa.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+#include <unistd.h>
+
+#ifdef WIN32
+#include <windows.h>
+#endif
+
+#include "fmgr.h"
+#include "port/pg_numa.h"
+#include "storage/pg_shmem.h"
+
+/*
+ * At this point we provide support only for Linux thanks to libnuma, but in
+ * future support for other platforms e.g. Win32 or FreeBSD might be possible
+ * too. For Win32 NUMA APIs see
+ * https://learn.microsoft.com/en-us/windows/win32/procthread/numa-support
+ */
+#ifdef USE_LIBNUMA
+
+#include <numa.h>
+#include <numaif.h>
+
+/* libnuma requires initialization as per numa(3) on Linux */
+int
+pg_numa_init(void)
+{
+	int			r = numa_available();
+
+	return r;
+}
+
+int
+pg_numa_query_pages(int pid, unsigned long count, void **pages, int *status)
+{
+	return numa_move_pages(pid, count, pages, NULL, status, 0);
+}
+
+int
+pg_numa_get_max_node(void)
+{
+	return numa_max_node();
+}
+
+Size
+pg_numa_get_pagesize(void)
+{
+	Size		os_page_size = sysconf(_SC_PAGESIZE);
+
+	if (huge_pages_status == HUGE_PAGES_ON)
+		GetHugePageSize(&os_page_size, NULL);
+
+	return os_page_size;
+}
+
+#ifndef FRONTEND
+/*
+ * XXX: not really tested as there is no way to trigger this in our
+ * current usage of libnuma.
+ *
+ * The libnuma built-in code can be seen here:
+ * https://github.com/numactl/numactl/blob/master/libnuma.c
+ *
+ */
+void
+numa_warn(int num, char *fmt,...)
+{
+	va_list		ap;
+	int			olde = errno;
+	int			needed;
+	StringInfoData msg;
+
+	initStringInfo(&msg);
+
+	va_start(ap, fmt);
+	needed = appendStringInfoVA(&msg, fmt, ap);
+	va_end(ap);
+	if (needed > 0)
+	{
+		enlargeStringInfo(&msg, needed);
+		va_start(ap, fmt);
+		appendStringInfoVA(&msg, fmt, ap);
+		va_end(ap);
+	}
+
+	ereport(WARNING,
+			(errcode(ERRCODE_EXTERNAL_ROUTINE_EXCEPTION),
+			 errmsg_internal("libnuma: WARNING: %s", msg.data)));
+
+	pfree(msg.data);
+
+	errno = olde;
+}
+
+void
+numa_error(char *where)
+{
+	int			olde = errno;
+
+	/*
+	 * XXX: for now we issue just WARNING, but long-term that might depend on
+	 * numa_set_strict() here.
+	 */
+	elog(WARNING, "libnuma: ERROR: %s", where);
+	errno = olde;
+}
+#endif							/* FRONTEND */
+
+#else
+
+/* Empty wrappers */
+int
+pg_numa_init(void)
+{
+	/* We state that NUMA is not available */
+	return -1;
+}
+
+int
+pg_numa_query_pages(int pid, unsigned long count, void **pages, int *status)
+{
+	return 0;
+}
+
+int
+pg_numa_get_max_node(void)
+{
+	return 0;
+}
+
+Size
+pg_numa_get_pagesize(void)
+{
+#ifndef WIN32
+	Size		os_page_size = sysconf(_SC_PAGESIZE);
+#else
+	Size		os_page_size;
+	SYSTEM_INFO sysinfo;
+
+	GetSystemInfo(&sysinfo);
+	os_page_size = sysinfo.dwPageSize;
+#endif
+	if (huge_pages_status == HUGE_PAGES_ON)
+		GetHugePageSize(&os_page_size, NULL);
+	return os_page_size;
+}
+
+#endif
+
+Datum
+pg_numa_available(PG_FUNCTION_ARGS)
+{
+	PG_RETURN_BOOL(pg_numa_init() != -1);
+}
-- 
2.39.5

