Module Name: src Committed By: rillig Date: Sat Feb 3 19:25:16 UTC 2024
Modified Files: src/usr.bin/xlint/lint1: ckgetopt.c debug.c emit1.c externs1.h init.c lex.c lint1.h tree.c Log Message: lint: keep strings in their source representation This allows further analysis depending on whether individual characters are escaped as octal, hexadecimal or not at all. To generate a diff of this commit: cvs rdiff -u -r1.21 -r1.22 src/usr.bin/xlint/lint1/ckgetopt.c cvs rdiff -u -r1.69 -r1.70 src/usr.bin/xlint/lint1/debug.c cvs rdiff -u -r1.84 -r1.85 src/usr.bin/xlint/lint1/emit1.c cvs rdiff -u -r1.214 -r1.215 src/usr.bin/xlint/lint1/externs1.h cvs rdiff -u -r1.257 -r1.258 src/usr.bin/xlint/lint1/init.c cvs rdiff -u -r1.211 -r1.212 src/usr.bin/xlint/lint1/lex.c \ src/usr.bin/xlint/lint1/lint1.h cvs rdiff -u -r1.601 -r1.602 src/usr.bin/xlint/lint1/tree.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/usr.bin/xlint/lint1/ckgetopt.c diff -u src/usr.bin/xlint/lint1/ckgetopt.c:1.21 src/usr.bin/xlint/lint1/ckgetopt.c:1.22 --- src/usr.bin/xlint/lint1/ckgetopt.c:1.21 Sat Feb 3 12:57:12 2024 +++ src/usr.bin/xlint/lint1/ckgetopt.c Sat Feb 3 19:25:16 2024 @@ -1,4 +1,4 @@ -/* $NetBSD: ckgetopt.c,v 1.21 2024/02/03 12:57:12 rillig Exp $ */ +/* $NetBSD: ckgetopt.c,v 1.22 2024/02/03 19:25:16 rillig Exp $ */ /*- * Copyright (c) 2021 The NetBSD Foundation, Inc. @@ -35,7 +35,7 @@ #include <sys/cdefs.h> #if defined(__RCSID) -__RCSID("$NetBSD: ckgetopt.c,v 1.21 2024/02/03 12:57:12 rillig Exp $"); +__RCSID("$NetBSD: ckgetopt.c,v 1.22 2024/02/03 19:25:16 rillig Exp $"); #endif #include <stdbool.h> @@ -100,7 +100,12 @@ is_getopt_condition(const tnode_t *tn, c && last_arg->tn_left->tn_op == ADDR && last_arg->tn_left->tn_left->tn_op == STRING && (str = last_arg->tn_left->tn_left->tn_string)->data != NULL) { - *out_options = xstrdup(str->data); + buffer buf; + buf_init(&buf); + quoted_iterator it = { .start = 0 }; + while (quoted_next(str, &it)) + buf_add_char(&buf, (char)it.value); + *out_options = buf.data; return true; } return false; Index: src/usr.bin/xlint/lint1/debug.c diff -u src/usr.bin/xlint/lint1/debug.c:1.69 src/usr.bin/xlint/lint1/debug.c:1.70 --- src/usr.bin/xlint/lint1/debug.c:1.69 Fri Feb 2 16:25:58 2024 +++ src/usr.bin/xlint/lint1/debug.c Sat Feb 3 19:25:16 2024 @@ -1,4 +1,4 @@ -/* $NetBSD: debug.c,v 1.69 2024/02/02 16:25:58 rillig Exp $ */ +/* $NetBSD: debug.c,v 1.70 2024/02/03 19:25:16 rillig Exp $ */ /*- * Copyright (c) 2021 The NetBSD Foundation, Inc. @@ -35,7 +35,7 @@ #include <sys/cdefs.h> #if defined(__RCSID) -__RCSID("$NetBSD: debug.c,v 1.69 2024/02/02 16:25:58 rillig Exp $"); +__RCSID("$NetBSD: debug.c,v 1.70 2024/02/03 19:25:16 rillig Exp $"); #endif #include <stdlib.h> @@ -235,11 +235,10 @@ debug_node(const tnode_t *tn) // NOLINT( debug_printf("\n"); break; case STRING: - debug_printf(", length %zu", tn->tn_string->len); if (tn->tn_string->data != NULL) - // TODO: May contain \0 or control characters. - debug_printf(", \"%s\"", tn->tn_string->data); - debug_printf("\n"); + debug_printf(", %s\n", tn->tn_string->data); + else + debug_printf(", length %zu\n", tn->tn_string->len); break; default: debug_printf("\n"); Index: src/usr.bin/xlint/lint1/emit1.c diff -u src/usr.bin/xlint/lint1/emit1.c:1.84 src/usr.bin/xlint/lint1/emit1.c:1.85 --- src/usr.bin/xlint/lint1/emit1.c:1.84 Sat Feb 3 12:57:12 2024 +++ src/usr.bin/xlint/lint1/emit1.c Sat Feb 3 19:25:16 2024 @@ -1,4 +1,4 @@ -/* $NetBSD: emit1.c,v 1.84 2024/02/03 12:57:12 rillig Exp $ */ +/* $NetBSD: emit1.c,v 1.85 2024/02/03 19:25:16 rillig Exp $ */ /* * Copyright (c) 1996 Christopher G. Demetriou. All Rights Reserved. @@ -38,9 +38,11 @@ #include <sys/cdefs.h> #if defined(__RCSID) -__RCSID("$NetBSD: emit1.c,v 1.84 2024/02/03 12:57:12 rillig Exp $"); +__RCSID("$NetBSD: emit1.c,v 1.85 2024/02/03 19:25:16 rillig Exp $"); #endif +#include <stdlib.h> + #include "lint1.h" static void outtt(sym_t *, sym_t *); @@ -367,10 +369,17 @@ outcall(const tnode_t *tn, bool retval_u } else if (arg->tn_op == ADDR && arg->tn_left->tn_op == STRING && arg->tn_left->tn_string->data != NULL) { - /* constant string, write all format specifiers */ + buffer buf; + buf_init(&buf); + quoted_iterator it = { .start = 0 }; + while (quoted_next(arg->tn_left->tn_string, &it)) + buf_add_char(&buf, (char)it.value); + + /* string literal, write all format specifiers */ outchar('s'); outint(n); - outfstrg(arg->tn_left->tn_string->data); + outfstrg(buf.data); + free(buf.data); } } outchar((char)(retval_discarded ? 'd' : retval_used ? 'u' : 'i')); Index: src/usr.bin/xlint/lint1/externs1.h diff -u src/usr.bin/xlint/lint1/externs1.h:1.214 src/usr.bin/xlint/lint1/externs1.h:1.215 --- src/usr.bin/xlint/lint1/externs1.h:1.214 Sat Feb 3 12:57:12 2024 +++ src/usr.bin/xlint/lint1/externs1.h Sat Feb 3 19:25:16 2024 @@ -1,4 +1,4 @@ -/* $NetBSD: externs1.h,v 1.214 2024/02/03 12:57:12 rillig Exp $ */ +/* $NetBSD: externs1.h,v 1.215 2024/02/03 19:25:16 rillig Exp $ */ /* * Copyright (c) 1994, 1995 Jochen Pohl @@ -396,6 +396,7 @@ void lex_comment(void); void lex_slash_slash_comment(void); void lex_unknown_character(int); int lex_input(void); +bool quoted_next(const buffer *, quoted_iterator *); /* * ckbool.c Index: src/usr.bin/xlint/lint1/init.c diff -u src/usr.bin/xlint/lint1/init.c:1.257 src/usr.bin/xlint/lint1/init.c:1.258 --- src/usr.bin/xlint/lint1/init.c:1.257 Thu Feb 1 18:37:06 2024 +++ src/usr.bin/xlint/lint1/init.c Sat Feb 3 19:25:16 2024 @@ -1,4 +1,4 @@ -/* $NetBSD: init.c,v 1.257 2024/02/01 18:37:06 rillig Exp $ */ +/* $NetBSD: init.c,v 1.258 2024/02/03 19:25:16 rillig Exp $ */ /* * Copyright (c) 1994, 1995 Jochen Pohl @@ -38,7 +38,7 @@ #include <sys/cdefs.h> #if defined(__RCSID) -__RCSID("$NetBSD: init.c,v 1.257 2024/02/01 18:37:06 rillig Exp $"); +__RCSID("$NetBSD: init.c,v 1.258 2024/02/03 19:25:16 rillig Exp $"); #endif #include <stdlib.h> @@ -888,6 +888,12 @@ initialization_init_array_from_string(in return false; size_t len = tn->tn_string->len; + if (tn->tn_string->data != NULL) { + quoted_iterator it = { .start = 0 }; + for (len = 0; quoted_next(tn->tn_string, &it); len++) + continue; + } + if (!tp->t_incomplete_array && (size_t)tp->t_dim < len) { /* string literal too long (%lu) for target array (%lu) */ warning(187, (unsigned long)len, (unsigned long)tp->t_dim); Index: src/usr.bin/xlint/lint1/lex.c diff -u src/usr.bin/xlint/lint1/lex.c:1.211 src/usr.bin/xlint/lint1/lex.c:1.212 --- src/usr.bin/xlint/lint1/lex.c:1.211 Sat Feb 3 18:58:05 2024 +++ src/usr.bin/xlint/lint1/lex.c Sat Feb 3 19:25:16 2024 @@ -1,4 +1,4 @@ -/* $NetBSD: lex.c,v 1.211 2024/02/03 18:58:05 rillig Exp $ */ +/* $NetBSD: lex.c,v 1.212 2024/02/03 19:25:16 rillig Exp $ */ /* * Copyright (c) 1996 Christopher G. Demetriou. All Rights Reserved. @@ -38,7 +38,7 @@ #include <sys/cdefs.h> #if defined(__RCSID) -__RCSID("$NetBSD: lex.c,v 1.211 2024/02/03 18:58:05 rillig Exp $"); +__RCSID("$NetBSD: lex.c,v 1.212 2024/02/03 19:25:16 rillig Exp $"); #endif #include <ctype.h> @@ -753,191 +753,263 @@ lex_operator(int t, op_t o) return t; } -static int prev_byte = -1; - -static int -read_escaped_oct(int c, bool wide) +static buffer * +read_quoted(bool *complete, bool wide, char delim) { - int n = 3; - int value = 0; - do { - value = (value << 3) + (c - '0'); - c = read_byte(); - } while (--n > 0 && '0' <= c && c <= '7'); - prev_byte = c; - if (value > TARG_UCHAR_MAX && !wide) { - /* character escape does not fit in character */ - warning(76); - value &= CHAR_MASK; - } - return value; -} - -static int64_t -read_escaped_hex(int c, bool wide) -{ - if (!allow_c90) - /* \x undefined in traditional C */ - warning(82); - uint64_t value = 0; - uint64_t mask = value_bits(wide ? 32 : CHAR_SIZE); - int state = 0; /* 0 = no digits, 1 = OK, 2 = overflow */ - while (c = read_byte(), isxdigit(c)) { - c = isdigit(c) ? c - '0' : toupper(c) - 'A' + 10; - value = (value << 4) + c; - if (state == 2) - continue; - if ((value & ~mask) != 0) { - /* overflow in hex escape */ - warning(75); - state = 2; - } else { - state = 1; + buffer *buf = xcalloc(1, sizeof(*buf)); + buf_init(buf); + if (wide) + buf_add_char(buf, 'L'); + buf_add_char(buf, delim); + + for (;;) { + int c = read_byte(); + if (c <= 0) + break; + buf_add_char(buf, (char)c); + if (c == '\n') + break; + if (c == delim) { + *complete = true; + return buf; + } + if (c == '\\') { + c = read_byte(); + buf_add_char(buf, (char)(c <= 0 ? ' ' : c)); + if (c <= 0) + break; } } - prev_byte = c; - if (state == 0) { - /* no hex digits follow \x */ - error(74); - } - if (state == 2) - value &= mask; - return (int64_t)value; + *complete = false; + buf_add_char(buf, delim); + return buf; } -static int64_t -read_escaped_backslash(int delim, bool wide) +bool +quoted_next(const buffer *lit, quoted_iterator *it) { - int c; + const char *s = lit->data; + size_t len = lit->len; - switch (c = read_byte()) { - case '"': - if (!allow_c90 && delim == '\'') - /* \" inside character constants undef... */ - warning(262); - return '"'; - case '\'': - return '\''; - case '?': - if (!allow_c90) - /* \? undefined in traditional C */ - warning(263); - return '?'; + *it = (quoted_iterator){ .i = it->i, .start = it->i }; + + char delim = s[s[0] == 'L' ? 1 : 0]; + + bool in_the_middle = it->i > 0; + if (it->i == 0) { + it->start = s[0] == 'L' ? 2 : 1; + it->i = it->start; + } + + for (;;) { + if (s[it->i] != delim) + break; + if (it->i + 1 == len) + return false; + it->next_literal = in_the_middle; + it->start += 2; + it->i += 2; + } + +again: + switch (s[it->i]) { case '\\': - return '\\'; + it->i++; + goto backslash; + case '\n': + it->unescaped_newline = true; + return false; + default: + it->value = (unsigned char)s[it->i++]; + return true; + } + +backslash: + it->escaped = true; + if ('0' <= s[it->i] && s[it->i] <= '7') + goto octal_escape; + switch (s[it->i++]) { + case '\n': + goto again; case 'a': - if (!allow_c90) - /* \a undefined in traditional C */ - warning(81); - return '\a'; + it->named_escape = true; + it->value = '\a'; + it->invalid_escape = !allow_c90; + return true; case 'b': - return '\b'; + it->named_escape = true; + it->value = '\b'; + return true; case 'e': - if (!allow_gcc) - break; - /* Not in the C standard yet, compilers recognize it */ - /* LINTED 79 */ - return '\e'; + it->named_escape = true; + it->value = '\033'; + it->invalid_escape = !allow_gcc; + return true; case 'f': - return '\f'; + it->named_escape = true; + it->value = '\f'; + return true; case 'n': - return '\n'; + it->named_escape = true; + it->value = '\n'; + return true; case 'r': - return '\r'; + it->named_escape = true; + it->value = '\r'; + return true; case 't': - return '\t'; + it->named_escape = true; + it->value = '\t'; + return true; case 'v': - if (!allow_c90) - /* \v undefined in traditional C */ - warning(264); - return '\v'; - case '8': case '9': - /* bad octal digit '%c' */ - warning(77, c); - /* FALLTHROUGH */ - case '0': case '1': case '2': case '3': - case '4': case '5': case '6': case '7': - return read_escaped_oct(c, wide); + it->named_escape = true; + it->value = '\v'; + it->invalid_escape = !allow_c90; + return true; case 'x': - return read_escaped_hex(c, wide); - case '\n': - return -3; - case EOF: - return -2; + goto hex_escape; + case '"': + it->literal_escape = true; + it->value = '"'; + it->invalid_escape = !allow_c90 && delim == '\''; + return true; + case '?': + it->literal_escape = true; + it->value = '?'; + it->invalid_escape = !allow_c90; + return true; default: - break; + it->invalid_escape = true; + /* FALLTHROUGH */ + case '\'': + case '\\': + it->literal_escape = true; + it->value = (unsigned char)s[it->i - 1]; + return true; } - if (isprint(c)) - /* dubious escape \%c */ - warning(79, c); - else - /* dubious escape \%o */ - warning(80, c); - return c; -} -/* - * Read a character which is part of a character constant or of a string - * and handle escapes. - * - * 'delim' is '\'' for character constants and '"' for string literals. - * - * Returns -1 if the end of the character constant or string is reached, - * -2 if the EOF is reached, and the character otherwise. - */ -static int64_t -get_escaped_char(int delim, bool wide) -{ +octal_escape: + it->octal_digits++; + it->value = s[it->i++] - '0'; + if ('0' <= s[it->i] && s[it->i] <= '7') { + it->octal_digits++; + it->value = 8 * it->value + (s[it->i++] - '0'); + if ('0' <= s[it->i] && s[it->i] <= '7') { + it->octal_digits++; + it->value = 8 * it->value + (s[it->i++] - '0'); + it->overflow = it->value > TARG_UCHAR_MAX + && s[0] != 'L'; + } + } + return true; - int64_t c = prev_byte; - if (c != -1) - prev_byte = -1; - else - c = read_byte(); +hex_escape: + for (;;) { + char ch = s[it->i]; + unsigned digit_value; + if ('0' <= ch && ch <= '9') + digit_value = ch - '0'; + else if ('A' <= ch && ch <= 'F') + digit_value = 10 + (ch - 'A'); + else if ('a' <= ch && ch <= 'f') + digit_value = 10 + (ch - 'a'); + else + break; - if (c == delim) - return -1; - switch (c) { - case '\n': - /* newline in string or char constant */ - error(254); - return -2; - case '\0': - /* syntax error '%s' */ - error(249, "EOF or null byte in literal"); - return -2; - case EOF: - return -2; - case '\\': - c = read_escaped_backslash(delim, wide); - if (c == -3) - return get_escaped_char(delim, wide); - break; - default: - if (c != ' ' && (isspace(c) || iscntrl(c))) { + it->i++; + it->value = 16 * it->value + digit_value; + uint64_t limit = s[0] == 'L' ? TARG_UINT_MAX : TARG_UCHAR_MAX; + if (it->value > limit) + it->overflow = true; + if (it->hex_digits < 3) + it->hex_digits++; + } + it->missing_hex_digits = it->hex_digits == 0; + return true; +} + +static void +check_quoted(const buffer *buf, bool complete, char delim) +{ + quoted_iterator it = { .start = 0 }; + while (quoted_next(buf, &it)) { + if (it.missing_hex_digits) + /* no hex digits follow \x */ + error(74); + if (it.hex_digits > 0 && !allow_c90) + /* \x undefined in traditional C */ + warning(82); + else if (!it.invalid_escape) + ; + else if (it.value == '8' || it.value == '9') + /* bad octal digit '%c' */ + warning(77, (int)it.value); + else if (it.literal_escape && it.value == '?') + /* \? undefined in traditional C */ + warning(263); + else if (it.literal_escape && it.value == '"') + /* \" inside character constants undefined in ... */ + warning(262); + else if (it.named_escape && it.value == '\a') + /* \a undefined in traditional C */ + warning(81); + else if (it.named_escape && it.value == '\v') + /* \v undefined in traditional C */ + warning(264); + else { + unsigned char ch = buf->data[it.i - 1]; + if (isprint(ch)) + /* dubious escape \%c */ + warning(79, ch); + else + /* dubious escape \%o */ + warning(80, ch); + } + if (it.overflow && it.hex_digits > 0) + /* overflow in hex escape */ + warning(75); + if (it.overflow && it.octal_digits > 0) + /* character escape does not fit in character */ + warning(76); + if (it.value < ' ' && !it.escaped && complete) /* invisible character U+%04X in %s */ - query_message(17, (unsigned int)c, delim == '"' + query_message(17, (unsigned)it.value, delim == '"' ? "string literal" : "character constant"); - } } - return c; + if (it.unescaped_newline) + /* newline in string or char constant */ + error(254); + if (!complete && delim == '"') + /* unterminated string constant */ + error(258); + if (!complete && delim == '\'') + /* unterminated character constant */ + error(253); +} + +static buffer * +lex_quoted(char delim, bool wide) +{ + bool complete; + buffer *buf = read_quoted(&complete, wide, delim); + check_quoted(buf, complete, delim); + return buf; } /* Called if lex found a leading "'". */ int lex_character_constant(void) { + buffer *buf = lex_quoted('\'', false); size_t n = 0; - int64_t val = 0, c; - while ((c = get_escaped_char('\'', false)) >= 0) { - val = (int64_t)((uint64_t)val << CHAR_SIZE) + c; + uint64_t val = 0; + quoted_iterator it = { .start = 0 }; + while (quoted_next(buf, &it)) { + val = (val << CHAR_SIZE) + it.value; n++; } - if (c == -2) { - /* unterminated character constant */ - error(253); - } else if (n > sizeof(int) || (n > 1 && (pflag || hflag))) { + if (n > sizeof(int) || (n > 1 && (pflag || hflag))) { /* * XXX: ^^ should rather be sizeof(TARG_INT). Luckily, * sizeof(int) is the same on all supported platforms. @@ -947,17 +1019,19 @@ lex_character_constant(void) } else if (n > 1) { /* multi-character character constant */ warning(294); - } else if (n == 0) { + } else if (n == 0 && !it.unescaped_newline) { /* empty character constant */ error(73); } - if (n == 1) - val = convert_integer(val, CHAR, CHAR_SIZE); + + int64_t cval = n == 1 + ? convert_integer((int64_t)val, CHAR, CHAR_SIZE) + : (int64_t)val; yylval.y_val = xcalloc(1, sizeof(*yylval.y_val)); yylval.y_val->v_tspec = INT; yylval.y_val->v_char_constant = true; - yylval.y_val->u.integer = val; + yylval.y_val->u.integer = cval; return T_CON; } @@ -968,22 +1042,20 @@ lex_character_constant(void) int lex_wide_character_constant(void) { - char buf[MB_LEN_MAX + 1]; - size_t nmax = MB_CUR_MAX; + buffer *buf = lex_quoted('\'', true); - int64_t c; - size_t n = 0; - while ((c = get_escaped_char('\'', true)) >= 0) { + static char wbuf[MB_LEN_MAX + 1]; + size_t n = 0, nmax = MB_CUR_MAX; + + quoted_iterator it = { .start = 0 }; + while (quoted_next(buf, &it)) { if (n < nmax) - buf[n] = (char)c; + wbuf[n] = (char)it.value; n++; } wchar_t wc = 0; - if (c == -2) { - /* unterminated character constant */ - error(253); - } else if (n == 0) { + if (n == 0) { /* empty character constant */ error(73); } else if (n > nmax) { @@ -991,9 +1063,9 @@ lex_wide_character_constant(void) /* too many characters in character constant */ error(71); } else { - buf[n] = '\0'; + wbuf[n] = '\0'; (void)mbtowc(NULL, NULL, 0); - if (mbtowc(&wc, buf, nmax) < 0) + if (mbtowc(&wc, wbuf, nmax) < 0) /* invalid multibyte character */ error(291); } @@ -1246,18 +1318,7 @@ clear_warn_flags(void) int lex_string(void) { - buffer *buf = xcalloc(1, sizeof(*buf)); - buf_init(buf); - - int64_t c; - while ((c = get_escaped_char('"', false)) >= 0) - buf_add_char(buf, (char)c); - if (c == -2) - /* unterminated string constant */ - error(258); - - - yylval.y_string = buf; + yylval.y_string = lex_quoted('"', false); return T_STRING; } @@ -1283,20 +1344,18 @@ wide_length(const buffer *buf) int lex_wide_string(void) { - int64_t c; + buffer *buf = lex_quoted('"', true); - buffer buf; - buf_init(&buf); - while ((c = get_escaped_char('"', true)) >= 0) - buf_add_char(&buf, (char)c); - if (c == -2) - /* unterminated string constant */ - error(258); + buffer str; + buf_init(&str); + quoted_iterator it = { .start = 0 }; + while (quoted_next(buf, &it)) + buf_add_char(&str, (char)it.value); - buffer *str = xcalloc(1, sizeof(*str)); - str->len = wide_length(&buf); + free(buf->data); + *buf = (buffer) { .len = wide_length(&str) }; - yylval.y_string = str; + yylval.y_string = buf; return T_STRING; } Index: src/usr.bin/xlint/lint1/lint1.h diff -u src/usr.bin/xlint/lint1/lint1.h:1.211 src/usr.bin/xlint/lint1/lint1.h:1.212 --- src/usr.bin/xlint/lint1/lint1.h:1.211 Thu Feb 1 18:37:06 2024 +++ src/usr.bin/xlint/lint1/lint1.h Sat Feb 3 19:25:16 2024 @@ -1,4 +1,4 @@ -/* $NetBSD: lint1.h,v 1.211 2024/02/01 18:37:06 rillig Exp $ */ +/* $NetBSD: lint1.h,v 1.212 2024/02/03 19:25:16 rillig Exp $ */ /* * Copyright (c) 1996 Christopher G. Demetriou. All Rights Reserved. @@ -311,9 +311,14 @@ typedef struct tnode { } tn_s; sym_t *_tn_sym; /* symbol if op == NAME */ val_t _tn_val; /* value if op == CON */ - buffer *_tn_string; /* string if op == STRING; for wide - * char strings, data is NULL but len - * is valid */ + buffer *_tn_string; /* string if op == STRING; for + * character strings, 'data' points to + * the concatenated string literals in + * source form, and 'len' is the + * length of the concatenation; for + * wide strings, 'data' is NULL and + * 'len' is the number of resulting + * characters */ } tn_u; } tnode_t; @@ -518,6 +523,22 @@ typedef enum { LC_VARARGS, } lint_comment; +typedef struct { + size_t start; + size_t i; + uint64_t value; + bool escaped; /* \n, \003, \x24 */ + bool named_escape; /* \a, \n, etc. */ + bool literal_escape; /* \?, \\, etc. */ + uint8_t octal_digits; /* 1 to 3; 0 means not applicable */ + uint8_t hex_digits; /* 1 to 3; 0 means not applicable */ + bool next_literal; /* when a new string literal begins */ + bool invalid_escape; /* single-character escape, recoverable */ + bool overflow; /* for octal and hex escapes */ + bool missing_hex_digits; + bool unescaped_newline; /* stops iterating */ +} quoted_iterator; + #include "externs1.h" #define lint_assert(cond) \ Index: src/usr.bin/xlint/lint1/tree.c diff -u src/usr.bin/xlint/lint1/tree.c:1.601 src/usr.bin/xlint/lint1/tree.c:1.602 --- src/usr.bin/xlint/lint1/tree.c:1.601 Thu Feb 1 21:19:13 2024 +++ src/usr.bin/xlint/lint1/tree.c Sat Feb 3 19:25:16 2024 @@ -1,4 +1,4 @@ -/* $NetBSD: tree.c,v 1.601 2024/02/01 21:19:13 rillig Exp $ */ +/* $NetBSD: tree.c,v 1.602 2024/02/03 19:25:16 rillig Exp $ */ /* * Copyright (c) 1994, 1995 Jochen Pohl @@ -37,7 +37,7 @@ #include <sys/cdefs.h> #if defined(__RCSID) -__RCSID("$NetBSD: tree.c,v 1.601 2024/02/01 21:19:13 rillig Exp $"); +__RCSID("$NetBSD: tree.c,v 1.602 2024/02/03 19:25:16 rillig Exp $"); #endif #include <float.h> @@ -522,14 +522,19 @@ build_name(sym_t *sym, bool is_funcname) } tnode_t * -build_string(buffer *strg) +build_string(buffer *lit) { - size_t len = strg->len; + size_t value_len = lit->len; + if (lit->data != NULL) { + quoted_iterator it = { .start = 0 }; + for (value_len = 0; quoted_next(lit, &it); value_len++) + continue; + } type_t *tp = expr_zero_alloc(sizeof(*tp), "type"); tp->t_tspec = ARRAY; - tp->t_subt = gettyp(strg->data != NULL ? CHAR : WCHAR_TSPEC); - tp->t_dim = (int)(len + 1); + tp->t_subt = gettyp(lit->data != NULL ? CHAR : WCHAR_TSPEC); + tp->t_dim = (int)(value_len + 1); tnode_t *n = expr_alloc_tnode(); n->tn_op = STRING; @@ -537,15 +542,15 @@ build_string(buffer *strg) n->tn_lvalue = true; n->tn_string = expr_zero_alloc(sizeof(*n->tn_string), "tnode.string"); - n->tn_string->len = len; + n->tn_string->len = lit->len; - if (strg->data != NULL) { - n->tn_string->data = expr_zero_alloc(len + 1, + if (lit->data != NULL) { + n->tn_string->data = expr_zero_alloc(lit->len + 1, "tnode.string.data"); - (void)memcpy(n->tn_string->data, strg->data, len + 1); - free(strg->data); + (void)memcpy(n->tn_string->data, lit->data, lit->len + 1); + free(lit->data); } - free(strg); + free(lit); return n; }