Module Name: src
Committed By: riastradh
Date: Sat Mar 29 19:40:42 UTC 2025
Modified Files:
src/lib/libc/gen: ctype_.c tolower_.c toupper_.c
src/tests/lib/libc/gen: t_ctype.c
Added Files:
src/lib/libc/gen: ctype_guard.h
Log Message:
ctype(3): Put guard pages before the C ctype/tolower/toupper tables.
This also only affects machines where char is signed for now. (But
maybe it would be worth doing unconditionally; users could still try
to pass in explicit `signed char' inputs.)
PR lib/58208: ctype(3) provides poor runtime feedback of abuse
To generate a diff of this commit:
cvs rdiff -u -r1.20 -r1.21 src/lib/libc/gen/ctype_.c
cvs rdiff -u -r0 -r1.1 src/lib/libc/gen/ctype_guard.h
cvs rdiff -u -r1.14 -r1.15 src/lib/libc/gen/tolower_.c \
src/lib/libc/gen/toupper_.c
cvs rdiff -u -r1.9 -r1.10 src/tests/lib/libc/gen/t_ctype.c
Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.
Modified files:
Index: src/lib/libc/gen/ctype_.c
diff -u src/lib/libc/gen/ctype_.c:1.20 src/lib/libc/gen/ctype_.c:1.21
--- src/lib/libc/gen/ctype_.c:1.20 Sat Apr 13 10:21:20 2013
+++ src/lib/libc/gen/ctype_.c Sat Mar 29 19:40:42 2025
@@ -1,4 +1,4 @@
-/* $NetBSD: ctype_.c,v 1.20 2013/04/13 10:21:20 joerg Exp $ */
+/* $NetBSD: ctype_.c,v 1.21 2025/03/29 19:40:42 riastradh Exp $ */
/*
* Copyright (c) 1989 The Regents of the University of California.
@@ -39,12 +39,16 @@
#if 0
/*static char *sccsid = "from: @(#)ctype_.c 5.6 (Berkeley) 6/1/90";*/
#else
-__RCSID("$NetBSD: ctype_.c,v 1.20 2013/04/13 10:21:20 joerg Exp $");
+__RCSID("$NetBSD: ctype_.c,v 1.21 2025/03/29 19:40:42 riastradh Exp $");
#endif
#endif /* LIBC_SCCS and not lint */
#include <sys/ctype_bits.h>
+#include <sys/mman.h>
+
#include <stdio.h>
+
+#include "ctype_guard.h"
#include "ctype_local.h"
#if EOF != -1
@@ -61,8 +65,12 @@ __RCSID("$NetBSD: ctype_.c,v 1.20 2013/0
#define _B _COMPAT_B
#define _N _COMPAT_N
-const unsigned char _C_compat_bsdctype[1 + _CTYPE_NUM_CHARS] = {
- 0,
+__ctype_table_guarded(_C_compat_bsdctype, _C_compat_bsdctype_guarded);
+__ctype_table
+static
+const unsigned char _C_compat_bsdctype_guarded[_C_COMPAT_BSDCTYPE_GUARD +
+ 1 + _CTYPE_NUM_CHARS] = {
+ [_C_COMPAT_BSDCTYPE_GUARD] = 0,
_C, _C, _C, _C, _C, _C, _C, _C,
_C, _C|_S, _C|_S, _C|_S, _C|_S, _C|_S, _C, _C,
_C, _C, _C, _C, _C, _C, _C, _C,
@@ -109,8 +117,11 @@ const unsigned char *_ctype_ = &_C_compa
#define _U _CTYPE_U
#define _X _CTYPE_X
-const unsigned short _C_ctype_tab_[1 + _CTYPE_NUM_CHARS] = {
- 0,
+__ctype_table_guarded(_C_ctype_tab_, _C_ctype_tab_guarded_);
+__ctype_table
+static const unsigned short _C_ctype_tab_guarded_[_C_CTYPE_TAB_GUARD +
+ 1 + _CTYPE_NUM_CHARS] = {
+ [_C_CTYPE_TAB_GUARD] = 0,
_C, _C, _C, _C,
_C, _C, _C, _C,
_C, _BL|_C|_S, _C|_S, _C|_S,
@@ -158,3 +169,18 @@ const unsigned short _C_ctype_tab_[1 + _
#undef _X
const unsigned short *_ctype_tab_ = &_C_ctype_tab_[0];
+
+#if _CTYPE_GUARD_PAGE
+__attribute__((constructor))
+static void
+_C_ctype_tab_guard_init(void)
+{
+
+#ifdef __BUILD_LEGACY
+ (void)mprotect(__UNCONST(_C_compat_bsdctype_guarded),
+ _CTYPE_GUARD_SIZE, PROT_NONE);
+#endif /* __BUILD_LEGACY */
+ (void)mprotect(__UNCONST(_C_ctype_tab_guarded_),
+ _CTYPE_GUARD_SIZE, PROT_NONE);
+}
+#endif /* _CTYPE_GUARD_PAGE */
Index: src/lib/libc/gen/tolower_.c
diff -u src/lib/libc/gen/tolower_.c:1.14 src/lib/libc/gen/tolower_.c:1.15
--- src/lib/libc/gen/tolower_.c:1.14 Sat Apr 13 10:16:27 2013
+++ src/lib/libc/gen/tolower_.c Sat Mar 29 19:40:42 2025
@@ -1,4 +1,4 @@
-/* $NetBSD: tolower_.c,v 1.14 2013/04/13 10:16:27 joerg Exp $ */
+/* $NetBSD: tolower_.c,v 1.15 2025/03/29 19:40:42 riastradh Exp $ */
/*
* Written by J.T. Conklin <[email protected]>.
@@ -7,19 +7,26 @@
#include <sys/cdefs.h>
#if defined(LIBC_RCS) && !defined(lint)
-__RCSID("$NetBSD: tolower_.c,v 1.14 2013/04/13 10:16:27 joerg Exp $");
+__RCSID("$NetBSD: tolower_.c,v 1.15 2025/03/29 19:40:42 riastradh Exp $");
#endif /* LIBC_RCS and not lint */
#include <sys/ctype_bits.h>
+#include <sys/mman.h>
+
#include <stdio.h>
+
+#include "ctype_guard.h"
#include "ctype_local.h"
#if EOF != -1
#error "EOF != -1"
#endif
-const short _C_tolower_tab_[1 + _CTYPE_NUM_CHARS] = {
- EOF,
+__ctype_table_guarded(_C_tolower_tab_, _C_tolower_tab_guarded_);
+__ctype_table
+static const short _C_tolower_tab_guarded_[_C_TOLOWER_TAB_GUARD +
+ 1 + _CTYPE_NUM_CHARS] = {
+ [_C_TOLOWER_TAB_GUARD] = EOF,
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
@@ -61,3 +68,14 @@ __weak_alias(_C_tolower_, _C_tolower_tab
#endif
const short *_tolower_tab_ = &_C_tolower_tab_[0];
+
+#if _CTYPE_GUARD_PAGE
+__attribute__((constructor))
+static void
+_C_tolower_tab_guard_init(void)
+{
+
+ (void)mprotect(__UNCONST(_C_tolower_tab_guarded_), _CTYPE_GUARD_SIZE,
+ PROT_NONE);
+}
+#endif /* _CTYPE_GUARD_PAGE */
Index: src/lib/libc/gen/toupper_.c
diff -u src/lib/libc/gen/toupper_.c:1.14 src/lib/libc/gen/toupper_.c:1.15
--- src/lib/libc/gen/toupper_.c:1.14 Sat Apr 13 10:16:27 2013
+++ src/lib/libc/gen/toupper_.c Sat Mar 29 19:40:42 2025
@@ -1,4 +1,4 @@
-/* $NetBSD: toupper_.c,v 1.14 2013/04/13 10:16:27 joerg Exp $ */
+/* $NetBSD: toupper_.c,v 1.15 2025/03/29 19:40:42 riastradh Exp $ */
/*
* Written by J.T. Conklin <[email protected]>.
@@ -7,19 +7,26 @@
#include <sys/cdefs.h>
#if defined(LIBC_RCS) && !defined(lint)
-__RCSID("$NetBSD: toupper_.c,v 1.14 2013/04/13 10:16:27 joerg Exp $");
+__RCSID("$NetBSD: toupper_.c,v 1.15 2025/03/29 19:40:42 riastradh Exp $");
#endif /* LIBC_RCS and not lint */
#include <sys/ctype_bits.h>
+#include <sys/mman.h>
+
#include <stdio.h>
+
+#include "ctype_guard.h"
#include "ctype_local.h"
#if EOF != -1
#error "EOF != -1"
#endif
-const short _C_toupper_tab_[1 + _CTYPE_NUM_CHARS] = {
- EOF,
+__ctype_table_guarded(_C_toupper_tab_, _C_toupper_tab_guarded_);
+__ctype_table
+static const short _C_toupper_tab_guarded_[_C_TOUPPER_TAB_GUARD +
+ 1 + _CTYPE_NUM_CHARS] = {
+ [_C_TOUPPER_TAB_GUARD] = EOF,
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
@@ -61,3 +68,14 @@ __weak_alias(_C_toupper_, _C_toupper_tab
#endif
const short *_toupper_tab_ = &_C_toupper_tab_[0];
+
+#if _CTYPE_GUARD_PAGE
+__attribute__((constructor))
+static void
+_C_toupper_tab_guard_init(void)
+{
+
+ (void)mprotect(__UNCONST(_C_toupper_tab_guarded_), _CTYPE_GUARD_SIZE,
+ PROT_NONE);
+}
+#endif /* _CTYPE_GUARD_PAGE */
Index: src/tests/lib/libc/gen/t_ctype.c
diff -u src/tests/lib/libc/gen/t_ctype.c:1.9 src/tests/lib/libc/gen/t_ctype.c:1.10
--- src/tests/lib/libc/gen/t_ctype.c:1.9 Sat Mar 29 01:06:36 2025
+++ src/tests/lib/libc/gen/t_ctype.c Sat Mar 29 19:40:42 2025
@@ -1,4 +1,4 @@
-/* $NetBSD: t_ctype.c,v 1.9 2025/03/29 01:06:36 riastradh Exp $ */
+/* $NetBSD: t_ctype.c,v 1.10 2025/03/29 19:40:42 riastradh Exp $ */
/*-
* Copyright (c) 2025 The NetBSD Foundation, Inc.
@@ -27,7 +27,7 @@
*/
#include <sys/cdefs.h>
-__RCSID("$NetBSD: t_ctype.c,v 1.9 2025/03/29 01:06:36 riastradh Exp $");
+__RCSID("$NetBSD: t_ctype.c,v 1.10 2025/03/29 19:40:42 riastradh Exp $");
#include <atf-c.h>
#include <ctype.h>
@@ -112,13 +112,7 @@ test_abuse_in_locales(const char *name,
ATF_REQUIRE_MSG(setlocale(LC_CTYPE, locales[i]) != NULL,
"locales[i]=%s", locales[i]);
snprintf(buf, sizeof(buf), "[%s]%s", locales[i], name);
- if (macro && strcmp(locales[i], "C") == 0) {
- atf_tc_expect_fail("PR lib/58208: ctype(3)"
- " provides poor runtime feedback of abuse");
- }
test_abuse(buf, ctypefn);
- if (strcmp(locales[i], "C") == 0)
- atf_tc_expect_pass();
}
}
@@ -789,8 +783,6 @@ ATF_TC_BODY(abuse_##FN##_macro_c, tc)
atf_tc_skip("runtime ctype(3) abuse is impossible with" \
" unsigned char"); \
} \
- atf_tc_expect_fail("PR lib/58208:" \
- " ctype(3) provides poor runtime feedback of abuse"); \
test_abuse(#FN, &FN##_wrapper); \
} \
ATF_TC(abuse_##FN##_function_c); \
Added files:
Index: src/lib/libc/gen/ctype_guard.h
diff -u /dev/null src/lib/libc/gen/ctype_guard.h:1.1
--- /dev/null Sat Mar 29 19:40:42 2025
+++ src/lib/libc/gen/ctype_guard.h Sat Mar 29 19:40:42 2025
@@ -0,0 +1,163 @@
+/* $NetBSD: ctype_guard.h,v 1.1 2025/03/29 19:40:42 riastradh Exp $ */
+
+/*-
+ * Copyright (c) 2025 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _LIBC_CTYPE_GUARD_H_
+#define _LIBC_CTYPE_GUARD_H_
+
+#include <sys/cdefs.h>
+
+#include "ctype_local.h"
+
+/*
+ * On platforms where char is signed, the ctype(3) functions have an
+ * insidious trap where logic like
+ *
+ * char *s = ...;
+ *
+ * if (isspace(*s)) ...
+ *
+ * is undefined behaviour whenever the string s has any bytes outside
+ * the 7-bit US-ASCII range. The correct way to do this is to cast any
+ * char inputs to unsigned char first:
+ *
+ * if (isspace((unsigned char)*s)) ...
+ *
+ * Unfortunately, the buggy idiom without an unsigned char cast is
+ * extremely prevalent in C code in the wild. (See the ctype(3) man
+ * page for details on why this idiom is required and why isspace
+ * itself can't just be `fixed'.)
+ *
+ * The ctype(3) functions are implemented as macros that expand to
+ * simple table lookups -- partly for performance, and partly to
+ * deliberately trigger -Wchar-subscript compiler warnings for
+ * suspicious code like the above.
+ *
+ * To noisily detect the undefined behaviour without incurring overhead
+ * for the defined cases of the ctype(3) functions, we put a guard page
+ * before the tables in memory so that (at least for inputs other than
+ * 0xff, which coincides as signed char with EOF = -1) attempts to use
+ * the ctype(3) functions on invalid inputs will reliably -- and
+ * safely! -- crash with SIGSEGV instead of sometimes returning
+ * nondeterministic nonsense results.
+ *
+ * We do this by defining two symbols, one local and one global:
+ *
+ * - The local (C `static') symbol _C_ctype_tab_guarded_ is
+ * page-aligned and has an extra page-sized buffer at the
+ * beginning for a guard page. It is defined as an ordinary C
+ * array to keep the source code legible.
+ *
+ * - The global symbol _C_ctype_tab_ starts one page past the
+ * start of _C_ctype_tab_guarded_. It is defined, via some
+ * macros for convenience, by the assembler directives:
+ *
+ * .type _C_ctype_tab_,@object
+ * .global _C_ctype_tab_
+ * _C_ctype_tab_ = _C_ctype_tab_guarded_ + PAGE_SIZE
+ *
+ * (with PAGE_SIZE being replaced by the numeric value from
+ * vmparam.h -- actually, we use MAX_PAGE_SIZE for the handful
+ * of architectures that support different page sizes on
+ * different machines).
+ *
+ * Then, at startup, we mprotect the guard page PROT_NONE.
+ *
+ * The global symbol _C_ctype_tab_ is an immutable part of the libc
+ * ABI, and is used by, e.g., libstdc++.so, so we have to define it
+ * compatibly -- this is why it must be defined as an ELF global symbol
+ * in its own right, and not simply handled inside libc.so by
+ * additional arithmetic relative to _C_ctype_tab_guarded_.
+ */
+
+#ifndef __CHAR_UNSIGNED__
+# define _CTYPE_GUARD_PAGE 1
+#else
+# define _CTYPE_GUARD_PAGE 0
+#endif
+
+#ifdef __arm__
+# define __ctype_table_object(name) \
+ __asm(".type " #name ",%object")
+#else
+# define __ctype_table_object(name) \
+ __asm(".type " #name ",@object")
+#endif
+
+#if _CTYPE_GUARD_PAGE
+
+# include <machine/vmparam.h>
+
+/*
+ * _CTYPE_GUARD_SIZE must be a macro so it will work through ___STRING
+ * to produce a string for symbol arithmetic in __asm.
+ */
+# ifdef MAX_PAGE_SIZE
+# define _CTYPE_GUARD_SIZE MAX_PAGE_SIZE
+# else
+# define _CTYPE_GUARD_SIZE PAGE_SIZE
+# endif
+
+enum {
+ _C_CTYPE_TAB_GUARD = _CTYPE_GUARD_SIZE/sizeof(_C_ctype_tab_[0]),
+# ifdef __BUILD_LEGACY
+ _C_COMPAT_BSDCTYPE_GUARD =
+ _CTYPE_GUARD_SIZE/sizeof(_C_compat_bsdctype[0]),
+# endif
+ _C_TOLOWER_TAB_GUARD = _CTYPE_GUARD_SIZE/sizeof(_C_tolower_tab_[0]),
+ _C_TOUPPER_TAB_GUARD = _CTYPE_GUARD_SIZE/sizeof(_C_toupper_tab_[0]),
+};
+
+# define __ctype_table __aligned(_CTYPE_GUARD_SIZE)
+# define __ctype_table_guarded(name, guard) \
+ __ctype_table_object(name); \
+ __asm(".global " _C_LABEL_STRING(#name)); \
+ __asm(_C_LABEL_STRING(#name) " = " _C_LABEL_STRING(#guard) " + " \
+ ___STRING(_CTYPE_GUARD_SIZE))
+
+#else /* !_CTYPE_GUARD_PAGE */
+
+# define _CTYPE_GUARD_SIZE 0
+
+enum {
+ _C_CTYPE_TAB_GUARD = 0,
+# ifdef __BUILD_LEGACY
+ _C_COMPAT_BSDCTYPE_GUARD = 0,
+# endif
+ _C_TOLOWER_TAB_GUARD = 0,
+ _C_TOUPPER_TAB_GUARD = 0,
+};
+
+/* Compiler can't see into __strong_alias, so mark it __used. */
+# define __ctype_table __used
+# define __ctype_table_guarded(name, guard) \
+ __ctype_table_object(name); \
+ __strong_alias(name, guard)
+
+#endif /* _CTYPE_GUARD_PAGE */
+
+#endif /* _LIBC_CTYPE_GUARD_H_ */