Module Name: src
Committed By: rillig
Date: Sat Feb 3 19:25:16 UTC 2024
Modified Files:
src/usr.bin/xlint/lint1: ckgetopt.c debug.c emit1.c externs1.h init.c
lex.c lint1.h tree.c
Log Message:
lint: keep strings in their source representation
This allows further analysis depending on whether individual characters are
escaped as octal, hexadecimal or not at all.
To generate a diff of this commit:
cvs rdiff -u -r1.21 -r1.22 src/usr.bin/xlint/lint1/ckgetopt.c
cvs rdiff -u -r1.69 -r1.70 src/usr.bin/xlint/lint1/debug.c
cvs rdiff -u -r1.84 -r1.85 src/usr.bin/xlint/lint1/emit1.c
cvs rdiff -u -r1.214 -r1.215 src/usr.bin/xlint/lint1/externs1.h
cvs rdiff -u -r1.257 -r1.258 src/usr.bin/xlint/lint1/init.c
cvs rdiff -u -r1.211 -r1.212 src/usr.bin/xlint/lint1/lex.c \
src/usr.bin/xlint/lint1/lint1.h
cvs rdiff -u -r1.601 -r1.602 src/usr.bin/xlint/lint1/tree.c
Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.
Modified files:
Index: src/usr.bin/xlint/lint1/ckgetopt.c
diff -u src/usr.bin/xlint/lint1/ckgetopt.c:1.21 src/usr.bin/xlint/lint1/ckgetopt.c:1.22
--- src/usr.bin/xlint/lint1/ckgetopt.c:1.21 Sat Feb 3 12:57:12 2024
+++ src/usr.bin/xlint/lint1/ckgetopt.c Sat Feb 3 19:25:16 2024
@@ -1,4 +1,4 @@
-/* $NetBSD: ckgetopt.c,v 1.21 2024/02/03 12:57:12 rillig Exp $ */
+/* $NetBSD: ckgetopt.c,v 1.22 2024/02/03 19:25:16 rillig Exp $ */
/*-
* Copyright (c) 2021 The NetBSD Foundation, Inc.
@@ -35,7 +35,7 @@
#include <sys/cdefs.h>
#if defined(__RCSID)
-__RCSID("$NetBSD: ckgetopt.c,v 1.21 2024/02/03 12:57:12 rillig Exp $");
+__RCSID("$NetBSD: ckgetopt.c,v 1.22 2024/02/03 19:25:16 rillig Exp $");
#endif
#include <stdbool.h>
@@ -100,7 +100,12 @@ is_getopt_condition(const tnode_t *tn, c
&& last_arg->tn_left->tn_op == ADDR
&& last_arg->tn_left->tn_left->tn_op == STRING
&& (str = last_arg->tn_left->tn_left->tn_string)->data != NULL) {
- *out_options = xstrdup(str->data);
+ buffer buf;
+ buf_init(&buf);
+ quoted_iterator it = { .start = 0 };
+ while (quoted_next(str, &it))
+ buf_add_char(&buf, (char)it.value);
+ *out_options = buf.data;
return true;
}
return false;
Index: src/usr.bin/xlint/lint1/debug.c
diff -u src/usr.bin/xlint/lint1/debug.c:1.69 src/usr.bin/xlint/lint1/debug.c:1.70
--- src/usr.bin/xlint/lint1/debug.c:1.69 Fri Feb 2 16:25:58 2024
+++ src/usr.bin/xlint/lint1/debug.c Sat Feb 3 19:25:16 2024
@@ -1,4 +1,4 @@
-/* $NetBSD: debug.c,v 1.69 2024/02/02 16:25:58 rillig Exp $ */
+/* $NetBSD: debug.c,v 1.70 2024/02/03 19:25:16 rillig Exp $ */
/*-
* Copyright (c) 2021 The NetBSD Foundation, Inc.
@@ -35,7 +35,7 @@
#include <sys/cdefs.h>
#if defined(__RCSID)
-__RCSID("$NetBSD: debug.c,v 1.69 2024/02/02 16:25:58 rillig Exp $");
+__RCSID("$NetBSD: debug.c,v 1.70 2024/02/03 19:25:16 rillig Exp $");
#endif
#include <stdlib.h>
@@ -235,11 +235,10 @@ debug_node(const tnode_t *tn) // NOLINT(
debug_printf("\n");
break;
case STRING:
- debug_printf(", length %zu", tn->tn_string->len);
if (tn->tn_string->data != NULL)
- // TODO: May contain \0 or control characters.
- debug_printf(", \"%s\"", tn->tn_string->data);
- debug_printf("\n");
+ debug_printf(", %s\n", tn->tn_string->data);
+ else
+ debug_printf(", length %zu\n", tn->tn_string->len);
break;
default:
debug_printf("\n");
Index: src/usr.bin/xlint/lint1/emit1.c
diff -u src/usr.bin/xlint/lint1/emit1.c:1.84 src/usr.bin/xlint/lint1/emit1.c:1.85
--- src/usr.bin/xlint/lint1/emit1.c:1.84 Sat Feb 3 12:57:12 2024
+++ src/usr.bin/xlint/lint1/emit1.c Sat Feb 3 19:25:16 2024
@@ -1,4 +1,4 @@
-/* $NetBSD: emit1.c,v 1.84 2024/02/03 12:57:12 rillig Exp $ */
+/* $NetBSD: emit1.c,v 1.85 2024/02/03 19:25:16 rillig Exp $ */
/*
* Copyright (c) 1996 Christopher G. Demetriou. All Rights Reserved.
@@ -38,9 +38,11 @@
#include <sys/cdefs.h>
#if defined(__RCSID)
-__RCSID("$NetBSD: emit1.c,v 1.84 2024/02/03 12:57:12 rillig Exp $");
+__RCSID("$NetBSD: emit1.c,v 1.85 2024/02/03 19:25:16 rillig Exp $");
#endif
+#include <stdlib.h>
+
#include "lint1.h"
static void outtt(sym_t *, sym_t *);
@@ -367,10 +369,17 @@ outcall(const tnode_t *tn, bool retval_u
} else if (arg->tn_op == ADDR &&
arg->tn_left->tn_op == STRING &&
arg->tn_left->tn_string->data != NULL) {
- /* constant string, write all format specifiers */
+ buffer buf;
+ buf_init(&buf);
+ quoted_iterator it = { .start = 0 };
+ while (quoted_next(arg->tn_left->tn_string, &it))
+ buf_add_char(&buf, (char)it.value);
+
+ /* string literal, write all format specifiers */
outchar('s');
outint(n);
- outfstrg(arg->tn_left->tn_string->data);
+ outfstrg(buf.data);
+ free(buf.data);
}
}
outchar((char)(retval_discarded ? 'd' : retval_used ? 'u' : 'i'));
Index: src/usr.bin/xlint/lint1/externs1.h
diff -u src/usr.bin/xlint/lint1/externs1.h:1.214 src/usr.bin/xlint/lint1/externs1.h:1.215
--- src/usr.bin/xlint/lint1/externs1.h:1.214 Sat Feb 3 12:57:12 2024
+++ src/usr.bin/xlint/lint1/externs1.h Sat Feb 3 19:25:16 2024
@@ -1,4 +1,4 @@
-/* $NetBSD: externs1.h,v 1.214 2024/02/03 12:57:12 rillig Exp $ */
+/* $NetBSD: externs1.h,v 1.215 2024/02/03 19:25:16 rillig Exp $ */
/*
* Copyright (c) 1994, 1995 Jochen Pohl
@@ -396,6 +396,7 @@ void lex_comment(void);
void lex_slash_slash_comment(void);
void lex_unknown_character(int);
int lex_input(void);
+bool quoted_next(const buffer *, quoted_iterator *);
/*
* ckbool.c
Index: src/usr.bin/xlint/lint1/init.c
diff -u src/usr.bin/xlint/lint1/init.c:1.257 src/usr.bin/xlint/lint1/init.c:1.258
--- src/usr.bin/xlint/lint1/init.c:1.257 Thu Feb 1 18:37:06 2024
+++ src/usr.bin/xlint/lint1/init.c Sat Feb 3 19:25:16 2024
@@ -1,4 +1,4 @@
-/* $NetBSD: init.c,v 1.257 2024/02/01 18:37:06 rillig Exp $ */
+/* $NetBSD: init.c,v 1.258 2024/02/03 19:25:16 rillig Exp $ */
/*
* Copyright (c) 1994, 1995 Jochen Pohl
@@ -38,7 +38,7 @@
#include <sys/cdefs.h>
#if defined(__RCSID)
-__RCSID("$NetBSD: init.c,v 1.257 2024/02/01 18:37:06 rillig Exp $");
+__RCSID("$NetBSD: init.c,v 1.258 2024/02/03 19:25:16 rillig Exp $");
#endif
#include <stdlib.h>
@@ -888,6 +888,12 @@ initialization_init_array_from_string(in
return false;
size_t len = tn->tn_string->len;
+ if (tn->tn_string->data != NULL) {
+ quoted_iterator it = { .start = 0 };
+ for (len = 0; quoted_next(tn->tn_string, &it); len++)
+ continue;
+ }
+
if (!tp->t_incomplete_array && (size_t)tp->t_dim < len) {
/* string literal too long (%lu) for target array (%lu) */
warning(187, (unsigned long)len, (unsigned long)tp->t_dim);
Index: src/usr.bin/xlint/lint1/lex.c
diff -u src/usr.bin/xlint/lint1/lex.c:1.211 src/usr.bin/xlint/lint1/lex.c:1.212
--- src/usr.bin/xlint/lint1/lex.c:1.211 Sat Feb 3 18:58:05 2024
+++ src/usr.bin/xlint/lint1/lex.c Sat Feb 3 19:25:16 2024
@@ -1,4 +1,4 @@
-/* $NetBSD: lex.c,v 1.211 2024/02/03 18:58:05 rillig Exp $ */
+/* $NetBSD: lex.c,v 1.212 2024/02/03 19:25:16 rillig Exp $ */
/*
* Copyright (c) 1996 Christopher G. Demetriou. All Rights Reserved.
@@ -38,7 +38,7 @@
#include <sys/cdefs.h>
#if defined(__RCSID)
-__RCSID("$NetBSD: lex.c,v 1.211 2024/02/03 18:58:05 rillig Exp $");
+__RCSID("$NetBSD: lex.c,v 1.212 2024/02/03 19:25:16 rillig Exp $");
#endif
#include <ctype.h>
@@ -753,191 +753,263 @@ lex_operator(int t, op_t o)
return t;
}
-static int prev_byte = -1;
-
-static int
-read_escaped_oct(int c, bool wide)
+static buffer *
+read_quoted(bool *complete, bool wide, char delim)
{
- int n = 3;
- int value = 0;
- do {
- value = (value << 3) + (c - '0');
- c = read_byte();
- } while (--n > 0 && '0' <= c && c <= '7');
- prev_byte = c;
- if (value > TARG_UCHAR_MAX && !wide) {
- /* character escape does not fit in character */
- warning(76);
- value &= CHAR_MASK;
- }
- return value;
-}
-
-static int64_t
-read_escaped_hex(int c, bool wide)
-{
- if (!allow_c90)
- /* \x undefined in traditional C */
- warning(82);
- uint64_t value = 0;
- uint64_t mask = value_bits(wide ? 32 : CHAR_SIZE);
- int state = 0; /* 0 = no digits, 1 = OK, 2 = overflow */
- while (c = read_byte(), isxdigit(c)) {
- c = isdigit(c) ? c - '0' : toupper(c) - 'A' + 10;
- value = (value << 4) + c;
- if (state == 2)
- continue;
- if ((value & ~mask) != 0) {
- /* overflow in hex escape */
- warning(75);
- state = 2;
- } else {
- state = 1;
+ buffer *buf = xcalloc(1, sizeof(*buf));
+ buf_init(buf);
+ if (wide)
+ buf_add_char(buf, 'L');
+ buf_add_char(buf, delim);
+
+ for (;;) {
+ int c = read_byte();
+ if (c <= 0)
+ break;
+ buf_add_char(buf, (char)c);
+ if (c == '\n')
+ break;
+ if (c == delim) {
+ *complete = true;
+ return buf;
+ }
+ if (c == '\\') {
+ c = read_byte();
+ buf_add_char(buf, (char)(c <= 0 ? ' ' : c));
+ if (c <= 0)
+ break;
}
}
- prev_byte = c;
- if (state == 0) {
- /* no hex digits follow \x */
- error(74);
- }
- if (state == 2)
- value &= mask;
- return (int64_t)value;
+ *complete = false;
+ buf_add_char(buf, delim);
+ return buf;
}
-static int64_t
-read_escaped_backslash(int delim, bool wide)
+bool
+quoted_next(const buffer *lit, quoted_iterator *it)
{
- int c;
+ const char *s = lit->data;
+ size_t len = lit->len;
- switch (c = read_byte()) {
- case '"':
- if (!allow_c90 && delim == '\'')
- /* \" inside character constants undef... */
- warning(262);
- return '"';
- case '\'':
- return '\'';
- case '?':
- if (!allow_c90)
- /* \? undefined in traditional C */
- warning(263);
- return '?';
+ *it = (quoted_iterator){ .i = it->i, .start = it->i };
+
+ char delim = s[s[0] == 'L' ? 1 : 0];
+
+ bool in_the_middle = it->i > 0;
+ if (it->i == 0) {
+ it->start = s[0] == 'L' ? 2 : 1;
+ it->i = it->start;
+ }
+
+ for (;;) {
+ if (s[it->i] != delim)
+ break;
+ if (it->i + 1 == len)
+ return false;
+ it->next_literal = in_the_middle;
+ it->start += 2;
+ it->i += 2;
+ }
+
+again:
+ switch (s[it->i]) {
case '\\':
- return '\\';
+ it->i++;
+ goto backslash;
+ case '\n':
+ it->unescaped_newline = true;
+ return false;
+ default:
+ it->value = (unsigned char)s[it->i++];
+ return true;
+ }
+
+backslash:
+ it->escaped = true;
+ if ('0' <= s[it->i] && s[it->i] <= '7')
+ goto octal_escape;
+ switch (s[it->i++]) {
+ case '\n':
+ goto again;
case 'a':
- if (!allow_c90)
- /* \a undefined in traditional C */
- warning(81);
- return '\a';
+ it->named_escape = true;
+ it->value = '\a';
+ it->invalid_escape = !allow_c90;
+ return true;
case 'b':
- return '\b';
+ it->named_escape = true;
+ it->value = '\b';
+ return true;
case 'e':
- if (!allow_gcc)
- break;
- /* Not in the C standard yet, compilers recognize it */
- /* LINTED 79 */
- return '\e';
+ it->named_escape = true;
+ it->value = '\033';
+ it->invalid_escape = !allow_gcc;
+ return true;
case 'f':
- return '\f';
+ it->named_escape = true;
+ it->value = '\f';
+ return true;
case 'n':
- return '\n';
+ it->named_escape = true;
+ it->value = '\n';
+ return true;
case 'r':
- return '\r';
+ it->named_escape = true;
+ it->value = '\r';
+ return true;
case 't':
- return '\t';
+ it->named_escape = true;
+ it->value = '\t';
+ return true;
case 'v':
- if (!allow_c90)
- /* \v undefined in traditional C */
- warning(264);
- return '\v';
- case '8': case '9':
- /* bad octal digit '%c' */
- warning(77, c);
- /* FALLTHROUGH */
- case '0': case '1': case '2': case '3':
- case '4': case '5': case '6': case '7':
- return read_escaped_oct(c, wide);
+ it->named_escape = true;
+ it->value = '\v';
+ it->invalid_escape = !allow_c90;
+ return true;
case 'x':
- return read_escaped_hex(c, wide);
- case '\n':
- return -3;
- case EOF:
- return -2;
+ goto hex_escape;
+ case '"':
+ it->literal_escape = true;
+ it->value = '"';
+ it->invalid_escape = !allow_c90 && delim == '\'';
+ return true;
+ case '?':
+ it->literal_escape = true;
+ it->value = '?';
+ it->invalid_escape = !allow_c90;
+ return true;
default:
- break;
+ it->invalid_escape = true;
+ /* FALLTHROUGH */
+ case '\'':
+ case '\\':
+ it->literal_escape = true;
+ it->value = (unsigned char)s[it->i - 1];
+ return true;
}
- if (isprint(c))
- /* dubious escape \%c */
- warning(79, c);
- else
- /* dubious escape \%o */
- warning(80, c);
- return c;
-}
-/*
- * Read a character which is part of a character constant or of a string
- * and handle escapes.
- *
- * 'delim' is '\'' for character constants and '"' for string literals.
- *
- * Returns -1 if the end of the character constant or string is reached,
- * -2 if the EOF is reached, and the character otherwise.
- */
-static int64_t
-get_escaped_char(int delim, bool wide)
-{
+octal_escape:
+ it->octal_digits++;
+ it->value = s[it->i++] - '0';
+ if ('0' <= s[it->i] && s[it->i] <= '7') {
+ it->octal_digits++;
+ it->value = 8 * it->value + (s[it->i++] - '0');
+ if ('0' <= s[it->i] && s[it->i] <= '7') {
+ it->octal_digits++;
+ it->value = 8 * it->value + (s[it->i++] - '0');
+ it->overflow = it->value > TARG_UCHAR_MAX
+ && s[0] != 'L';
+ }
+ }
+ return true;
- int64_t c = prev_byte;
- if (c != -1)
- prev_byte = -1;
- else
- c = read_byte();
+hex_escape:
+ for (;;) {
+ char ch = s[it->i];
+ unsigned digit_value;
+ if ('0' <= ch && ch <= '9')
+ digit_value = ch - '0';
+ else if ('A' <= ch && ch <= 'F')
+ digit_value = 10 + (ch - 'A');
+ else if ('a' <= ch && ch <= 'f')
+ digit_value = 10 + (ch - 'a');
+ else
+ break;
- if (c == delim)
- return -1;
- switch (c) {
- case '\n':
- /* newline in string or char constant */
- error(254);
- return -2;
- case '\0':
- /* syntax error '%s' */
- error(249, "EOF or null byte in literal");
- return -2;
- case EOF:
- return -2;
- case '\\':
- c = read_escaped_backslash(delim, wide);
- if (c == -3)
- return get_escaped_char(delim, wide);
- break;
- default:
- if (c != ' ' && (isspace(c) || iscntrl(c))) {
+ it->i++;
+ it->value = 16 * it->value + digit_value;
+ uint64_t limit = s[0] == 'L' ? TARG_UINT_MAX : TARG_UCHAR_MAX;
+ if (it->value > limit)
+ it->overflow = true;
+ if (it->hex_digits < 3)
+ it->hex_digits++;
+ }
+ it->missing_hex_digits = it->hex_digits == 0;
+ return true;
+}
+
+static void
+check_quoted(const buffer *buf, bool complete, char delim)
+{
+ quoted_iterator it = { .start = 0 };
+ while (quoted_next(buf, &it)) {
+ if (it.missing_hex_digits)
+ /* no hex digits follow \x */
+ error(74);
+ if (it.hex_digits > 0 && !allow_c90)
+ /* \x undefined in traditional C */
+ warning(82);
+ else if (!it.invalid_escape)
+ ;
+ else if (it.value == '8' || it.value == '9')
+ /* bad octal digit '%c' */
+ warning(77, (int)it.value);
+ else if (it.literal_escape && it.value == '?')
+ /* \? undefined in traditional C */
+ warning(263);
+ else if (it.literal_escape && it.value == '"')
+ /* \" inside character constants undefined in ... */
+ warning(262);
+ else if (it.named_escape && it.value == '\a')
+ /* \a undefined in traditional C */
+ warning(81);
+ else if (it.named_escape && it.value == '\v')
+ /* \v undefined in traditional C */
+ warning(264);
+ else {
+ unsigned char ch = buf->data[it.i - 1];
+ if (isprint(ch))
+ /* dubious escape \%c */
+ warning(79, ch);
+ else
+ /* dubious escape \%o */
+ warning(80, ch);
+ }
+ if (it.overflow && it.hex_digits > 0)
+ /* overflow in hex escape */
+ warning(75);
+ if (it.overflow && it.octal_digits > 0)
+ /* character escape does not fit in character */
+ warning(76);
+ if (it.value < ' ' && !it.escaped && complete)
/* invisible character U+%04X in %s */
- query_message(17, (unsigned int)c, delim == '"'
+ query_message(17, (unsigned)it.value, delim == '"'
? "string literal" : "character constant");
- }
}
- return c;
+ if (it.unescaped_newline)
+ /* newline in string or char constant */
+ error(254);
+ if (!complete && delim == '"')
+ /* unterminated string constant */
+ error(258);
+ if (!complete && delim == '\'')
+ /* unterminated character constant */
+ error(253);
+}
+
+static buffer *
+lex_quoted(char delim, bool wide)
+{
+ bool complete;
+ buffer *buf = read_quoted(&complete, wide, delim);
+ check_quoted(buf, complete, delim);
+ return buf;
}
/* Called if lex found a leading "'". */
int
lex_character_constant(void)
{
+ buffer *buf = lex_quoted('\'', false);
size_t n = 0;
- int64_t val = 0, c;
- while ((c = get_escaped_char('\'', false)) >= 0) {
- val = (int64_t)((uint64_t)val << CHAR_SIZE) + c;
+ uint64_t val = 0;
+ quoted_iterator it = { .start = 0 };
+ while (quoted_next(buf, &it)) {
+ val = (val << CHAR_SIZE) + it.value;
n++;
}
- if (c == -2) {
- /* unterminated character constant */
- error(253);
- } else if (n > sizeof(int) || (n > 1 && (pflag || hflag))) {
+ if (n > sizeof(int) || (n > 1 && (pflag || hflag))) {
/*
* XXX: ^^ should rather be sizeof(TARG_INT). Luckily,
* sizeof(int) is the same on all supported platforms.
@@ -947,17 +1019,19 @@ lex_character_constant(void)
} else if (n > 1) {
/* multi-character character constant */
warning(294);
- } else if (n == 0) {
+ } else if (n == 0 && !it.unescaped_newline) {
/* empty character constant */
error(73);
}
- if (n == 1)
- val = convert_integer(val, CHAR, CHAR_SIZE);
+
+ int64_t cval = n == 1
+ ? convert_integer((int64_t)val, CHAR, CHAR_SIZE)
+ : (int64_t)val;
yylval.y_val = xcalloc(1, sizeof(*yylval.y_val));
yylval.y_val->v_tspec = INT;
yylval.y_val->v_char_constant = true;
- yylval.y_val->u.integer = val;
+ yylval.y_val->u.integer = cval;
return T_CON;
}
@@ -968,22 +1042,20 @@ lex_character_constant(void)
int
lex_wide_character_constant(void)
{
- char buf[MB_LEN_MAX + 1];
- size_t nmax = MB_CUR_MAX;
+ buffer *buf = lex_quoted('\'', true);
- int64_t c;
- size_t n = 0;
- while ((c = get_escaped_char('\'', true)) >= 0) {
+ static char wbuf[MB_LEN_MAX + 1];
+ size_t n = 0, nmax = MB_CUR_MAX;
+
+ quoted_iterator it = { .start = 0 };
+ while (quoted_next(buf, &it)) {
if (n < nmax)
- buf[n] = (char)c;
+ wbuf[n] = (char)it.value;
n++;
}
wchar_t wc = 0;
- if (c == -2) {
- /* unterminated character constant */
- error(253);
- } else if (n == 0) {
+ if (n == 0) {
/* empty character constant */
error(73);
} else if (n > nmax) {
@@ -991,9 +1063,9 @@ lex_wide_character_constant(void)
/* too many characters in character constant */
error(71);
} else {
- buf[n] = '\0';
+ wbuf[n] = '\0';
(void)mbtowc(NULL, NULL, 0);
- if (mbtowc(&wc, buf, nmax) < 0)
+ if (mbtowc(&wc, wbuf, nmax) < 0)
/* invalid multibyte character */
error(291);
}
@@ -1246,18 +1318,7 @@ clear_warn_flags(void)
int
lex_string(void)
{
- buffer *buf = xcalloc(1, sizeof(*buf));
- buf_init(buf);
-
- int64_t c;
- while ((c = get_escaped_char('"', false)) >= 0)
- buf_add_char(buf, (char)c);
- if (c == -2)
- /* unterminated string constant */
- error(258);
-
-
- yylval.y_string = buf;
+ yylval.y_string = lex_quoted('"', false);
return T_STRING;
}
@@ -1283,20 +1344,18 @@ wide_length(const buffer *buf)
int
lex_wide_string(void)
{
- int64_t c;
+ buffer *buf = lex_quoted('"', true);
- buffer buf;
- buf_init(&buf);
- while ((c = get_escaped_char('"', true)) >= 0)
- buf_add_char(&buf, (char)c);
- if (c == -2)
- /* unterminated string constant */
- error(258);
+ buffer str;
+ buf_init(&str);
+ quoted_iterator it = { .start = 0 };
+ while (quoted_next(buf, &it))
+ buf_add_char(&str, (char)it.value);
- buffer *str = xcalloc(1, sizeof(*str));
- str->len = wide_length(&buf);
+ free(buf->data);
+ *buf = (buffer) { .len = wide_length(&str) };
- yylval.y_string = str;
+ yylval.y_string = buf;
return T_STRING;
}
Index: src/usr.bin/xlint/lint1/lint1.h
diff -u src/usr.bin/xlint/lint1/lint1.h:1.211 src/usr.bin/xlint/lint1/lint1.h:1.212
--- src/usr.bin/xlint/lint1/lint1.h:1.211 Thu Feb 1 18:37:06 2024
+++ src/usr.bin/xlint/lint1/lint1.h Sat Feb 3 19:25:16 2024
@@ -1,4 +1,4 @@
-/* $NetBSD: lint1.h,v 1.211 2024/02/01 18:37:06 rillig Exp $ */
+/* $NetBSD: lint1.h,v 1.212 2024/02/03 19:25:16 rillig Exp $ */
/*
* Copyright (c) 1996 Christopher G. Demetriou. All Rights Reserved.
@@ -311,9 +311,14 @@ typedef struct tnode {
} tn_s;
sym_t *_tn_sym; /* symbol if op == NAME */
val_t _tn_val; /* value if op == CON */
- buffer *_tn_string; /* string if op == STRING; for wide
- * char strings, data is NULL but len
- * is valid */
+ buffer *_tn_string; /* string if op == STRING; for
+ * character strings, 'data' points to
+ * the concatenated string literals in
+ * source form, and 'len' is the
+ * length of the concatenation; for
+ * wide strings, 'data' is NULL and
+ * 'len' is the number of resulting
+ * characters */
} tn_u;
} tnode_t;
@@ -518,6 +523,22 @@ typedef enum {
LC_VARARGS,
} lint_comment;
+typedef struct {
+ size_t start;
+ size_t i;
+ uint64_t value;
+ bool escaped; /* \n, \003, \x24 */
+ bool named_escape; /* \a, \n, etc. */
+ bool literal_escape; /* \?, \\, etc. */
+ uint8_t octal_digits; /* 1 to 3; 0 means not applicable */
+ uint8_t hex_digits; /* 1 to 3; 0 means not applicable */
+ bool next_literal; /* when a new string literal begins */
+ bool invalid_escape; /* single-character escape, recoverable */
+ bool overflow; /* for octal and hex escapes */
+ bool missing_hex_digits;
+ bool unescaped_newline; /* stops iterating */
+} quoted_iterator;
+
#include "externs1.h"
#define lint_assert(cond) \
Index: src/usr.bin/xlint/lint1/tree.c
diff -u src/usr.bin/xlint/lint1/tree.c:1.601 src/usr.bin/xlint/lint1/tree.c:1.602
--- src/usr.bin/xlint/lint1/tree.c:1.601 Thu Feb 1 21:19:13 2024
+++ src/usr.bin/xlint/lint1/tree.c Sat Feb 3 19:25:16 2024
@@ -1,4 +1,4 @@
-/* $NetBSD: tree.c,v 1.601 2024/02/01 21:19:13 rillig Exp $ */
+/* $NetBSD: tree.c,v 1.602 2024/02/03 19:25:16 rillig Exp $ */
/*
* Copyright (c) 1994, 1995 Jochen Pohl
@@ -37,7 +37,7 @@
#include <sys/cdefs.h>
#if defined(__RCSID)
-__RCSID("$NetBSD: tree.c,v 1.601 2024/02/01 21:19:13 rillig Exp $");
+__RCSID("$NetBSD: tree.c,v 1.602 2024/02/03 19:25:16 rillig Exp $");
#endif
#include <float.h>
@@ -522,14 +522,19 @@ build_name(sym_t *sym, bool is_funcname)
}
tnode_t *
-build_string(buffer *strg)
+build_string(buffer *lit)
{
- size_t len = strg->len;
+ size_t value_len = lit->len;
+ if (lit->data != NULL) {
+ quoted_iterator it = { .start = 0 };
+ for (value_len = 0; quoted_next(lit, &it); value_len++)
+ continue;
+ }
type_t *tp = expr_zero_alloc(sizeof(*tp), "type");
tp->t_tspec = ARRAY;
- tp->t_subt = gettyp(strg->data != NULL ? CHAR : WCHAR_TSPEC);
- tp->t_dim = (int)(len + 1);
+ tp->t_subt = gettyp(lit->data != NULL ? CHAR : WCHAR_TSPEC);
+ tp->t_dim = (int)(value_len + 1);
tnode_t *n = expr_alloc_tnode();
n->tn_op = STRING;
@@ -537,15 +542,15 @@ build_string(buffer *strg)
n->tn_lvalue = true;
n->tn_string = expr_zero_alloc(sizeof(*n->tn_string), "tnode.string");
- n->tn_string->len = len;
+ n->tn_string->len = lit->len;
- if (strg->data != NULL) {
- n->tn_string->data = expr_zero_alloc(len + 1,
+ if (lit->data != NULL) {
+ n->tn_string->data = expr_zero_alloc(lit->len + 1,
"tnode.string.data");
- (void)memcpy(n->tn_string->data, strg->data, len + 1);
- free(strg->data);
+ (void)memcpy(n->tn_string->data, lit->data, lit->len + 1);
+ free(lit->data);
}
- free(strg);
+ free(lit);
return n;
}