This is related to my effort to remove the global LC_CTYPE dependency,
and set the global LC_CTYPE to C.
The replacement of "%m" (e.g. with "Permission denied" if
errno==EACCES) in a message is done using strerror_r(), which sometimes
does translation. If it does translate, strerror uses LC_CTYPE to
determine the target encoding, and LC_MESSAGES to determine the
language/region. (It appears that strerror translation only happens on
Linux -- corrent me if I'm wrong.)
Currently, strerror translation is orthogonal to our NLS system which
translates Postgres messages (e.g. "division by zero") using gettext
along with our own translations (.po files). The Postgres messages
might be translated but not the "%m" replacements, or vice-versa,
depending on whether NLS is enabled, the OS, etc.
The attached patch changes "%m" replacements to use gettext for
translation. That makes the overall translations more consistent,
equally available on all platforms, and not dependent on LC_CTYPE
(because gettext allows the encoding for gettext can be set separately
with bind_textdomain_codeset()).
It also fixes an issue with translations when LC_CTYPE=C, where
strerror can't find the target encoding, so it forces the translated
message into ASCII even if the database encoding supports all of the
resulting characters. For instance, if LC_CTYPE=C and
LC_MESSAGES=fr_FR.UTF-8 and errno=EACCES and the database encoding is
UTF-8, you get:
Permission non accord?e
instead of:
Permission non accordée
I also attached a C file for testing, which generates the messages and
translations for a range of errnos, and outputs in .po format. As
mentioned earlier, I think the only OS that does any translation of
these messages is linux, but corrections are welcome.
One downside is that there are more messages to translate -- one per
errno that Postgres might plausibly encounter, plus a few more for
variations between platforms.
Comments?
Regards,
Jeff Davis
From 073bd5416b4c31bbe08975a6863ae8a80d0542da Mon Sep 17 00:00:00 2001
From: Jeff Davis <[email protected]>
Date: Wed, 22 Oct 2025 10:49:59 -0700
Subject: [PATCH v1] NLS: use gettext() to translate system error messages.
Previously, errors from the system such as "Permission denied"
(EACCES) relied on strerror_r() to perform translation; which has
different behavior from gettext(), which translates Postgres error
messages like "division by zero".
Disable translations inside of strerror_r() by temporarily switching
to the C locale, and instead perform the translations with gettext.
This makes translation of system error messages consistent across
platforms, respecting whether NLS is enabled or not. It also avoids
strerror_r()'s dependence on the global LC_CTYPE setting (gettext does
not rely on LC_CTYPE).
Creates a need to translate more messages -- one for each errno that
Postgres might plausibly encounter, or possibly a few more for
platform variations of the string representations.
---
src/port/snprintf.c | 73 +++++++++++++++++++++++++++++++++++++++++++--
1 file changed, 71 insertions(+), 2 deletions(-)
diff --git a/src/port/snprintf.c b/src/port/snprintf.c
index d7f18b42d19..ace39f2673e 100644
--- a/src/port/snprintf.c
+++ b/src/port/snprintf.c
@@ -33,6 +33,9 @@
#include "c.h"
+#ifdef HAVE_USELOCALE
+#include <locale.h>
+#endif
#include <math.h>
/*
@@ -161,6 +164,8 @@ typedef union
static void flushbuffer(PrintfTarget *target);
static void dopr(PrintfTarget *target, const char *format, va_list args);
+static char *c_strerror_r(int errnum, char *buf, size_t buflen);
+static char *nls_strerror_r(int errnum, char *buf, size_t buflen);
/*
@@ -724,8 +729,8 @@ nextch2:
case 'm':
{
char errbuf[PG_STRERROR_R_BUFLEN];
- const char *errm = strerror_r(save_errno,
- errbuf, sizeof(errbuf));
+ const char *errm = nls_strerror_r(save_errno,
+ errbuf, sizeof(errbuf));
dostr(errm, strlen(errm), target);
}
@@ -1566,3 +1571,67 @@ trailing_pad(int padlen, PrintfTarget *target)
if (padlen < 0)
dopr_outchmulti(' ', -padlen, target);
}
+
+
+/*
+ * If NLS is enabled, translate the system error message. Otherwise, return
+ * the untranslated string.
+ */
+static char *
+nls_strerror_r(int errnum, char *buf, size_t buflen)
+{
+#ifdef ENABLE_NLS
+ char plain[PG_STRERROR_R_BUFLEN];
+ char *msgid;
+ char *msgstr;
+
+ /* run c_strerror_r to get plain untranslated string */
+ msgid = c_strerror_r(errnum, plain, PG_STRERROR_R_BUFLEN);
+
+ /* translate with gettext() and store in result buffer */
+ msgstr = _(msgid);
+ strlcpy(buf, msgstr, buflen);
+ return buf;
+#else
+ return c_strerror_r(errnum, buf, buflen);
+#endif
+}
+
+/*
+ * Temporarily switches to the C locale to ensure that strerror_r() returns an
+ * untranslated string.
+ *
+ * The purpose of this function is to avoid strerror_r() performing the
+ * translation itself, which has different behavior than gettext. In
+ * particular, strerror_r() may force the translated message into the ASCII
+ * character set if LC_CTYPE=C, even if the database encoding supports a wider
+ * character set (e.g. UTF-8). We also want to avoid translations when NLS is
+ * disabled.
+ */
+static char *
+c_strerror_r(int errnum, char *buf, size_t buflen)
+{
+#ifdef HAVE_USELOCALE
+ static locale_t c_locale = NULL;
+ char *msgid;
+ locale_t save_loc;
+
+ if (!c_locale)
+ c_locale = newlocale(LC_ALL_MASK, "C", NULL);
+
+ save_loc = uselocale(c_locale);
+
+ msgid = strerror_r(errnum, buf, buflen);
+
+ if (save_loc != NULL)
+ uselocale(save_loc);
+
+ return msgid;
+#else
+ /*
+ * Platforms lacking uselocale() have not been observed to translate
+ * messages inside strerror_r().
+ */
+ return strerror_r(errnum, buf, buflen);
+#endif
+}
--
2.43.0
#include <errno.h>
#include <locale.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
static void translate(locale_t loc, int err);
static char *get_errno_symbol(int errnum);
int
main(int argc, char **argv)
{
const char *localename;
int min_err;
int max_err;
locale_t c_loc;
setlocale(LC_CTYPE, "");
setlocale(LC_MESSAGES, "");
if (argc != 3)
{
fprintf(stderr, "usage:\n\terrno_translation min_errno max_errno\n");
exit(1);
}
min_err = atoi(argv[1]);
max_err = atoi(argv[2]);
if (min_err >= max_err)
{
fprintf(stderr, "min_err must be less than max_err\n");
exit(1);
}
c_loc = newlocale(LC_ALL_MASK, "C", NULL);
if (!c_loc)
{
fprintf(stderr, "can't create locale: C\n");
exit(1);
}
for (int i = min_err; i < max_err; i++)
translate(c_loc, i);
}
static void
translate(locale_t c_loc, int err)
{
const char *errorname = get_errno_symbol(err);
const char *msgid;
const char *msgstr;
locale_t save_loc;
printf("#. %s\n", errorname);
save_loc = uselocale(c_loc);
msgid = strerror(err);
uselocale(save_loc);
printf("msgid \"%s\"\n", msgid);
msgstr = strerror(err);
printf("msgstr \"%s\"\n", msgstr);
printf("\n");
}
/*
* Returns a symbol (e.g. "ENOENT") for an errno code.
* Returns NULL if the code is unrecognized.
*/
static char *
get_errno_symbol(int errnum)
{
switch (errnum)
{
case E2BIG:
return "E2BIG";
case EACCES:
return "EACCES";
case EADDRINUSE:
return "EADDRINUSE";
case EADDRNOTAVAIL:
return "EADDRNOTAVAIL";
case EAFNOSUPPORT:
return "EAFNOSUPPORT";
#ifdef EAGAIN
case EAGAIN:
return "EAGAIN";
#endif
#ifdef EALREADY
case EALREADY:
return "EALREADY";
#endif
case EBADF:
return "EBADF";
#ifdef EBADMSG
case EBADMSG:
return "EBADMSG";
#endif
case EBUSY:
return "EBUSY";
case ECHILD:
return "ECHILD";
case ECONNABORTED:
return "ECONNABORTED";
case ECONNREFUSED:
return "ECONNREFUSED";
case ECONNRESET:
return "ECONNRESET";
case EDEADLK:
return "EDEADLK";
case EDOM:
return "EDOM";
case EEXIST:
return "EEXIST";
case EFAULT:
return "EFAULT";
case EFBIG:
return "EFBIG";
case EHOSTDOWN:
return "EHOSTDOWN";
case EHOSTUNREACH:
return "EHOSTUNREACH";
case EIDRM:
return "EIDRM";
case EINPROGRESS:
return "EINPROGRESS";
case EINTR:
return "EINTR";
case EINVAL:
return "EINVAL";
case EIO:
return "EIO";
case EISCONN:
return "EISCONN";
case EISDIR:
return "EISDIR";
#ifdef ELOOP
case ELOOP:
return "ELOOP";
#endif
case EMFILE:
return "EMFILE";
case EMLINK:
return "EMLINK";
case EMSGSIZE:
return "EMSGSIZE";
case ENAMETOOLONG:
return "ENAMETOOLONG";
case ENETDOWN:
return "ENETDOWN";
case ENETRESET:
return "ENETRESET";
case ENETUNREACH:
return "ENETUNREACH";
case ENFILE:
return "ENFILE";
case ENOBUFS:
return "ENOBUFS";
case ENODEV:
return "ENODEV";
case ENOENT:
return "ENOENT";
case ENOEXEC:
return "ENOEXEC";
case ENOMEM:
return "ENOMEM";
case ENOSPC:
return "ENOSPC";
case ENOSYS:
return "ENOSYS";
case ENOTCONN:
return "ENOTCONN";
case ENOTDIR:
return "ENOTDIR";
case ENOTEMPTY:
return "ENOTEMPTY";
case ENOTSOCK:
return "ENOTSOCK";
#ifdef ENOTSUP
case ENOTSUP:
return "ENOTSUP";
#endif
case ENOTTY:
return "ENOTTY";
case ENXIO:
return "ENXIO";
#if defined(EOPNOTSUPP) && (!defined(ENOTSUP) || (EOPNOTSUPP != ENOTSUP))
case EOPNOTSUPP:
return "EOPNOTSUPP";
#endif
#ifdef EOVERFLOW
case EOVERFLOW:
return "EOVERFLOW";
#endif
case EPERM:
return "EPERM";
case EPIPE:
return "EPIPE";
case EPROTONOSUPPORT:
return "EPROTONOSUPPORT";
case ERANGE:
return "ERANGE";
#ifdef EROFS
case EROFS:
return "EROFS";
#endif
case ESRCH:
return "ESRCH";
case ETIMEDOUT:
return "ETIMEDOUT";
#ifdef ETXTBSY
case ETXTBSY:
return "ETXTBSY";
#endif
#if defined(EWOULDBLOCK) && (!defined(EAGAIN) || (EWOULDBLOCK != EAGAIN))
case EWOULDBLOCK:
return "EWOULDBLOCK";
#endif
case EXDEV:
return "EXDEV";
}
return NULL;
}