Hi, Nicholas Marriott wrote on Mon, Feb 25, 2019 at 10:14:03AM +0000: > Ingo Schwarze wrote:
>> During the upcoming cleanup steps, let use retain full support for >> the first (ESC-[) syntax and lets us completely delete support for >> the second and third CSI syntaxes (single-byte CSI and UTF-8 >> single-character two-byte CSI). >> >> If you are OK with that plan, i'll send diffs implementing that. > Very little, if anything at all, uses 8-bit or UTF-8 CSI, > getting rid of it is a good idea. Thank you for all your feedback. So here is a patch doing that. The central part is dropping the definitions related to the 8-bit and the UTF-8 CSI from less.h, keeping only the definitions needed for ESC-[. In the future, these forms of the CSI will be treated exactly like other control characters: e.g., encoded for display. The rest of the diff mechanically resolves the IS_CSI_START() macro, replacing it by a simple "== ESC" test. Casting is needed at none of the places: the types involved are simply: '[' int (value 0x5b < 0x7f) 037 int (value 0x1f < 0x7f) -- apply & --> ESC int (value 0x1b < 0x7f) So there is no danger in comparing ESC using == or != with any of the types char, unsigned char (which are covered by the int range during promotion) or LWCHAR == unsigned long, in which case the (positve) constant ESC gets sign extended, which is value-preserving for a positive integer. The effects of the various chunks are, in detail: * cvt.c, cvt_text(): 8-bit and UTF-8 CSI are no longer recognized as ANSI escape sequences but copied just like any other control characters. * filename.c, bin_file(): 8-bit and UTF-8 CSI are no longer skipped but counted as binary characters just like any other binary characters. (change in behaviour confirmed by testing) * line.c, pshift(): No longer treat 8-bit and UTF-8 CSI sequences specially when shifting a line left. (shifting left and right was tested) * line.c, in_ansi_esc_seq(): No longer recognize 8-bit and UTF-8 CSI sequences. This allows simplifying the loop to iterate over bytes rather than over characters, getting rif of one LWCHAR variable and one call to the pesky function step_char(-1}. (it was tested that ESC-[ still works) * line.c, store_char(): No longer recognize 8-bit and UTF-8 CSI sequences. Again, minus one LWCHAR and minus one step_char(-1). * line.c, do_append(): No longer pass 8-bit and UTF-8 CSI unencoded, encode them like any other control character. This survived light testing. All code touched will be touched again in the upcoming cleanup because all these functions still contain step_char() or get_wchar(), but this step simplifies many of the upcoming cleanup steps. Already minus six lines of code and minus seven macro calls in this patch alone. OK? Ingo Index: cvt.c =================================================================== RCS file: /cvs/src/usr.bin/less/cvt.c,v retrieving revision 1.8 diff -u -p -r1.8 cvt.c --- cvt.c 17 Sep 2016 15:06:41 -0000 1.8 +++ cvt.c 25 Feb 2019 11:25:38 -0000 @@ -77,7 +77,7 @@ cvt_text(char *odst, char *osrc, int *ch dst--; } while (dst > odst && !IS_ASCII_OCTET(*dst) && !IS_UTF8_LEAD(*dst)); - } else if ((ops & CVT_ANSI) && IS_CSI_START(ch)) { + } else if ((ops & CVT_ANSI) && ch == ESC) { /* Skip to end of ANSI escape sequence. */ src++; /* skip the CSI start char */ while (src < src_end) Index: filename.c =================================================================== RCS file: /cvs/src/usr.bin/less/filename.c,v retrieving revision 1.26 diff -u -p -r1.26 filename.c --- filename.c 29 Oct 2017 17:10:55 -0000 1.26 +++ filename.c 25 Feb 2019 11:25:38 -0000 @@ -348,7 +348,7 @@ bin_file(int f) pend = &data[n]; for (p = data; p < pend; ) { LWCHAR c = step_char(&p, +1, pend); - if (ctldisp == OPT_ONPLUS && IS_CSI_START(c)) { + if (ctldisp == OPT_ONPLUS && c == ESC) { do { c = step_char(&p, +1, pend); } while (p < pend && is_ansi_middle(c)); Index: less.h =================================================================== RCS file: /cvs/src/usr.bin/less/less.h,v retrieving revision 1.28 diff -u -p -r1.28 less.h --- less.h 30 Dec 2018 23:09:58 -0000 1.28 +++ less.h 25 Feb 2019 11:25:38 -0000 @@ -38,8 +38,6 @@ #define IS_SPACE(c) isspace((unsigned char)(c)) #define IS_DIGIT(c) isdigit((unsigned char)(c)) -#define IS_CSI_START(c) (((LWCHAR)(c)) == ESC || (((LWCHAR)(c)) == CSI)) - #ifndef TRUE #define TRUE 1 #endif @@ -151,9 +149,7 @@ struct textlist { #define AT_INDET (1 << 7) /* Indeterminate: either bold or underline */ #define CONTROL(c) ((c)&037) - #define ESC CONTROL('[') -#define CSI ((unsigned char)'\233') #define S_INTERRUPT 01 #define S_STOP 02 Index: line.c =================================================================== RCS file: /cvs/src/usr.bin/less/line.c,v retrieving revision 1.23 diff -u -p -r1.23 line.c --- line.c 24 Feb 2019 04:54:36 -0000 1.23 +++ line.c 25 Feb 2019 11:25:39 -0000 @@ -230,7 +230,7 @@ pshift(int shift) */ while (shifted <= shift && from < curr) { c = linebuf[from]; - if (ctldisp == OPT_ONPLUS && IS_CSI_START(c)) { + if (ctldisp == OPT_ONPLUS && c == ESC) { /* Keep cumulative effect. */ linebuf[to] = c; attr[to++] = attr[from++]; @@ -469,11 +469,10 @@ in_ansi_esc_seq(void) * Search backwards for either an ESC (which means we ARE in a seq); * or an end char (which means we're NOT in a seq). */ - for (p = &linebuf[curr]; p > linebuf; ) { - LWCHAR ch = step_char(&p, -1, linebuf); - if (IS_CSI_START(ch)) + for (p = linebuf + curr - 1; p >= linebuf; p--) { + if (*p == ESC) return (1); - if (!is_ansi_middle(ch)) + if (!is_ansi_middle(*p)) return (0); } return (0); @@ -533,17 +532,14 @@ store_char(LWCHAR ch, char a, char *rep, if (ctldisp == OPT_ONPLUS && in_ansi_esc_seq()) { if (!is_ansi_end(ch) && !is_ansi_middle(ch)) { /* Remove whole unrecognized sequence. */ - char *p = &linebuf[curr]; - LWCHAR bch; do { - bch = step_char(&p, -1, linebuf); - } while (p > linebuf && !IS_CSI_START(bch)); - curr = p - linebuf; + curr--; + } while (curr > 0 && linebuf[curr] != ESC); return (0); } a = AT_ANSI; /* Will force re-AT_'ing around it. */ w = 0; - } else if (ctldisp == OPT_ONPLUS && IS_CSI_START(ch)) { + } else if (ctldisp == OPT_ONPLUS && ch == ESC) { a = AT_ANSI; /* Will force re-AT_'ing around it. */ w = 0; } else { @@ -851,7 +847,7 @@ do_append(LWCHAR ch, char *rep, off_t po } else if ((!utf_mode || is_ascii_char(ch)) && control_char((char)ch)) { do_control_char: if (ctldisp == OPT_ON || - (ctldisp == OPT_ONPLUS && IS_CSI_START(ch))) { + (ctldisp == OPT_ONPLUS && ch == ESC)) { /* * Output as a normal character. */