Hi Paul, There's a piece of code in Bison that I don't understand. It's dating back to:
> commit 4517da37570b39a3d4b3f523dd373abe7c622bb0 > Author: Paul Eggert <[email protected]> > Date: Wed Dec 28 08:45:47 2005 +0000 > > +/* If BUF is null, add BUFSIZE (which in this case must be less than > + INT_MAX) to COLUMN; otherwise, add mbsnwidth (BUF, BUFSIZE, 0) to > + COLUMN. If an overflow occurs, or might occur but is undetectable, > + return INT_MAX. Assume COLUMN is nonnegative. */ > + > +static inline int > +add_column_width (int column, char const *buf, size_t bufsize) > +{ > + size_t width; > + unsigned int remaining_columns = INT_MAX - column; > + > + if (buf) > + { > + if (INT_MAX / 2 <= bufsize) This is the part I don't get. Didn't you mean the converse? if (INT_MAX <= bufsize / 2) Which would be a means to make sure that if buf is only about plain ASCII characters and is larger than INT_MAX, then it won't fit in width so have it saturate to INT_MAX. But maybe this piece of code is obsolete anyway? I mean, currently mbsnwidth returns an int anyway, and seems to deal with overflow by itself. So I guess we don't even need this precaution today. > + return INT_MAX; > + width = mbsnwidth (buf, bufsize, 0); > + } > + else > + width = bufsize; > + > + return width <= remaining_columns ? column + width : INT_MAX; > +} Anyway, the following proposal addresses some issues caught by GCC's UB sanitizer. commit adbe1d33d02b60507eea9f73751d9ff07c04bbeb Author: Akim Demaille <[email protected]> Date: Tue Mar 12 19:09:10 2019 +0100 address warnings from GCC's UB sanitizer Running with CC='gcc-mp-8 -fsanitize=undefined' revealed Undefined Behaviors. * src/state.c (errs_new): Don't call memcpy with NULL as source. * src/location.c (add_column_width): Don't assume that the column argument is nonnegative: the scanner sometimes "backtracks" (e.g., see ROLLBACK_CURRENT_TOKEN and DEPRECATED) in which case we can have negative column numbers (temporarily). Found in test 3 (Invalid inputs). diff --git a/src/location.c b/src/location.c index ab478107..e623f6e5 100644 --- a/src/location.c +++ b/src/location.c @@ -32,14 +32,12 @@ location const empty_location = EMPTY_LOCATION_INIT; /* If BUF is null, add BUFSIZE (which in this case must be less than INT_MAX) to COLUMN; otherwise, add mbsnwidth (BUF, BUFSIZE, 0) to COLUMN. If an overflow occurs, or might occur but is undetectable, - return INT_MAX. Assume COLUMN is nonnegative. */ + return INT_MAX. */ static inline int add_column_width (int column, char const *buf, size_t bufsize) { - size_t width; - unsigned remaining_columns = INT_MAX - column; - + int width; if (buf) { if (INT_MAX / 2 <= bufsize) @@ -47,9 +45,12 @@ add_column_width (int column, char const *buf, size_t bufsize) width = mbsnwidth (buf, bufsize, 0); } else - width = bufsize; - - return width <= remaining_columns ? column + width : INT_MAX; + { + if (INT_MAX <= bufsize) + return INT_MAX; + width = bufsize; + } + return column <= INT_MAX - width ? column + width : INT_MAX; } /* Set *LOC and adjust scanner cursor to account for token TOKEN of @@ -66,7 +67,7 @@ location_compute (location *loc, boundary *cur, char const *token, size_t size) loc->start = *cur; - for (p = token; p < lim; p++) + for (p = token; p < lim; ++p) switch (*p) { case '\n': diff --git a/src/state.c b/src/state.c index b8b8e2ff..58980954 100644 --- a/src/state.c +++ b/src/state.c @@ -77,7 +77,8 @@ errs_new (int num, symbol **tokens) size_t symbols_size = num * sizeof *tokens; errs *res = xmalloc (offsetof (errs, symbols) + symbols_size); res->num = num; - memcpy (res->symbols, tokens, symbols_size); + if (tokens) + memcpy (res->symbols, tokens, symbols_size); return res; }
