Module Name: src
Committed By: rin
Date: Fri Jan 5 02:38:06 UTC 2024
Modified Files:
src/usr.bin/mklocale: mklocale.1 yacc.y
Log Message:
mklocale(1): Add range check for TODIGIT, rather than disabling it
PR lib/57798
Digit value specified by TODIGIT is storaged as lowest 8 bits of
_RuneType, see lib/libc/locale/runetype_file.h:
https://nxr.netbsd.org/xref/src/lib/libc/locale/runetype_file.h#56
The symptom reported in the PR is due to missing range check for
this value; values of 256 and above were mistakenly treated as
other flag bits in _RuneType.
For example, U+5146 has numerical value 1000,000,000,000 ==
0xe8d4a51000 where __BITS(30, 31) == _RUNETYPE_SW3 are turned on.
This is why wcwidth(3) returned 3 for this character.
This apparently affected not only character width, but also other
attributes storaged in _RuneType.
IIUC, digit value attributes in _RuneType have never been utilized
until now, but preserve these if digit fits within (0, 256). This
should be safer for pulling this up into netbsd-10. Also, these
attributes may be useful to implement some I18N features as
suggested by uwe@ in the PR.
netbsd-[98] is not affected as these use old UTF-8 ctype definitions.
To generate a diff of this commit:
cvs rdiff -u -r1.18 -r1.19 src/usr.bin/mklocale/mklocale.1
cvs rdiff -u -r1.35 -r1.36 src/usr.bin/mklocale/yacc.y
Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.
Modified files:
Index: src/usr.bin/mklocale/mklocale.1
diff -u src/usr.bin/mklocale/mklocale.1:1.18 src/usr.bin/mklocale/mklocale.1:1.19
--- src/usr.bin/mklocale/mklocale.1:1.18 Thu Dec 28 03:49:35 2023
+++ src/usr.bin/mklocale/mklocale.1 Fri Jan 5 02:38:06 2024
@@ -1,4 +1,4 @@
-.\" $NetBSD: mklocale.1,v 1.18 2023/12/28 03:49:35 rin Exp $
+.\" $NetBSD: mklocale.1,v 1.19 2024/01/05 02:38:06 rin Exp $
.\" FreeBSD: src/usr.bin/mklocale/mklocale.1,v 1.6 1999/09/20 09:15:21 phantom Exp
.\"
.\" Copyright (c) 1993, 1994
@@ -33,7 +33,7 @@
.\"
.\" @(#)mklocale.1 8.2 (Berkeley) 4/18/94
.\"
-.Dd December 28, 2023
+.Dd January 5, 2024
.Dt MKLOCALE 1
.Os
.Sh NAME
@@ -212,7 +212,9 @@ For example, the ASCII character
would map to the decimal value 0.
On
.Nx ,
-this information is ignored and not put into the binary output file.
+this information has never been used until now.
+Only values up to 255 are allowed, and mapping to 256 and above is
+silently ignored.
.El
.Pp
The following keywords may appear multiple times and have the following
Index: src/usr.bin/mklocale/yacc.y
diff -u src/usr.bin/mklocale/yacc.y:1.35 src/usr.bin/mklocale/yacc.y:1.36
--- src/usr.bin/mklocale/yacc.y:1.35 Thu Dec 28 03:49:35 2023
+++ src/usr.bin/mklocale/yacc.y Fri Jan 5 02:38:06 2024
@@ -1,4 +1,4 @@
-/* $NetBSD: yacc.y,v 1.35 2023/12/28 03:49:35 rin Exp $ */
+/* $NetBSD: yacc.y,v 1.36 2024/01/05 02:38:06 rin Exp $ */
%{
/*-
@@ -43,7 +43,7 @@
static char sccsid[] = "@(#)yacc.y 8.1 (Berkeley) 6/6/93";
static char rcsid[] = "$FreeBSD$";
#else
-__RCSID("$NetBSD: yacc.y,v 1.35 2023/12/28 03:49:35 rin Exp $");
+__RCSID("$NetBSD: yacc.y,v 1.36 2024/01/05 02:38:06 rin Exp $");
#endif
#endif /* not lint */
@@ -82,9 +82,7 @@ __nbrune_t charsetmask = (__nbrune_t)0x0
__nbrune_t charsetmask = (__nbrune_t)0xffffffff;
void set_map(rune_map *, rune_list *, u_int32_t);
-#if 0
void set_digitmap(rune_map *, rune_list *);
-#endif
void add_map(rune_map *, rune_list *, u_int32_t);
__dead void usage(void);
@@ -189,19 +187,8 @@ entry : ENCODING STRING
{ set_map(&maplower, $2, 0); }
| MAPUPPER map
{ set_map(&mapupper, $2, 0); }
-/*
- * XXX PR lib/57798
- * set_digitmap() was implemented with an assumption that
- * all characters are mapped to numerical values <= 255.
- * This is no longer true for Unicode, and results in, e.g.,
- * wrong return values of wcwidth(3) for U+5146 or U+16B60.
- *
- * | DIGITMAP map
- * { set_digitmap(&types, $2); }
- *
- */
- | DIGITMAP mapignore
- { }
+ | DIGITMAP map
+ { set_digitmap(&types, $2); }
;
list : RUNE
@@ -267,12 +254,6 @@ map : LBRK RUNE RUNE RBRK
$$->next = $1;
}
;
-
-mapignore : LBRK RUNE RUNE RBRK { }
- | map LBRK RUNE RUNE RBRK { }
- | LBRK RUNE THRU RUNE ':' RUNE RBRK { }
- | map LBRK RUNE THRU RUNE ':' RUNE RBRK { }
- ;
%%
int debug = 0;
@@ -401,7 +382,6 @@ set_map(rune_map *map, rune_list *list,
}
}
-#if 0
void
set_digitmap(rune_map *map, rune_list *list)
{
@@ -410,18 +390,24 @@ set_digitmap(rune_map *map, rune_list *l
while (list) {
rune_list *nlist = list->next;
for (i = list->min; i <= list->max; ++i) {
- if (list->map + (i - list->min)) {
+ /*
+ * XXX PR lib/57798
+ * Currently, we support mapping up to 255. Attempts to map
+ * 256 (== _RUNETYPE_A) and above are silently ignored.
+ */
+ _RuneType digit = list->map + (i - list->min);
+ if (digit > 0 && digit <= 0xff) {
rune_list *tmp = (rune_list *)xmalloc(sizeof(rune_list));
+ memset(tmp, 0, sizeof(*tmp));
tmp->min = i;
tmp->max = i;
- add_map(map, tmp, list->map + (i - list->min));
+ add_map(map, tmp, digit);
}
}
free(list);
list = nlist;
}
}
-#endif
void
add_map(rune_map *map, rune_list *list, u_int32_t flag)