Hi Takashi, On Fri, 19 Dec 2025, Takashi Yano wrote:
> Currently, there are four separate workarounds for pseudo console > output in pty_master_fwd_thread. Each workaround has its own 'for' > loop that iterates over the entire output buffer, which is not > efficient. This patch consolidates these loops and introduces a > single state machine to handle all worarounds at once. In addition, > the workarouds are moved into a dedicated function, > 'workarounds_for_pseudo_console_output()' to improve readability. Thank you for spending the time and effort to write this commit message. I find it quite clear. And of course: Thank you for implementing this! > Suggested-by: Johannes Schindelin <[email protected]> > Reviewed-by: > Signed-off-by: Takashi Yano <[email protected]> > --- > winsup/cygwin/fhandler/pty.cc | 283 ++++++++++++++++------------------ > 1 file changed, 129 insertions(+), 154 deletions(-) Very nice. > diff --git a/winsup/cygwin/fhandler/pty.cc b/winsup/cygwin/fhandler/pty.cc > index 32e50540e..7fa747e0a 100644 > --- a/winsup/cygwin/fhandler/pty.cc > +++ b/winsup/cygwin/fhandler/pty.cc > @@ -2642,6 +2642,134 @@ pty_master_thread (VOID *arg) > return fhandler_pty_master::pty_master_thread (&p); > } > > +static DWORD > +workarounds_for_pseudo_console_output (char *outbuf, DWORD rlen) > +{ > + int state = 0; > + int start_at = 0; > + bool is_csi = false; > + bool is_osc = false; > + int arg = 0; > + bool saw_greater_than_sign = false; I had something slightly different in mind, and it might very well be over-engineered and/or less readable: - Instead of is_csi/is_osc/saw_greater_than_sign, I thought about fanning out state, so that state = 2 means we just saw CSI, state = 3 means we saw OSC, state = 4 means we saw a number after CSI, etc. - Since those numbers would now be quite magic, I thought about using an `enum` to clarify their purposes. Having written all that, I have to admit that your variant of the patch is eminently easier to review. > + for (DWORD i=0; i<rlen; i++) > + if (state == 0 && outbuf[i] == '\033') > + { > + start_at = i; > + state = 1; > + is_csi = false; > + is_osc = false; > + arg = 0; > + continue; > + } > + else if (state == 1) > + { > + switch (outbuf[i]) > + { > + case '[': > + is_csi = true; > + state = 2; > + break; > + case ']': > + is_osc = true; > + state = 2; > + break; > + case '\033': > + start_at = i; > + state = 1; > + break; > + default: > + state = 0; > + } > + continue; > + } > + else if (is_csi) > + { > + if (state == 2 && outbuf[i] == '>') > + saw_greater_than_sign = true; > + else if (state == 2 && (isdigit (outbuf[i]) || outbuf[i] == ';')) > + continue; > + else if (state == 2) > + { The two previous `if`/`else if` both have the same `state == 2` condition and could be folded into this here conditional block. I am not quite sure, though, that that would be desirable: Treating `>` and digits and `;` visually separately from the logic that looks at the end of the ANSI sequences reduces the cognitive load, I find. All this is to say: I reviewed this patch the best way I can, and I like it! Thank you, Johannes > + if (saw_greater_than_sign && outbuf[i] == 'm') > + { > + /* Remove CSI > Pm m */ > + memmove (&outbuf[start_at], &outbuf[i+1], rlen-i-1); > + rlen = start_at + rlen - i - 1; > + i = start_at - 1; > + state = 0; > + } > + else if (wincap.has_pcon_omit_nl_before_cursor_move () > + && !saw_greater_than_sign && outbuf[i] == 'H') > + /* Workaround for rlwrap in Win11. rlwrap treats text between > + NLs as a line, however, pseudo console in Win11 somtimes > + omits NL before "CSIm;nH". This does not happen in Win10. */ > + { > + /* Add omitted CR NL before "CSIm;nH". However, when the > + cusor is on the bottom-most line, adding NL might cause > + unexpected scrolling. To avoid this, add "CSI H" before > + CR NL. */ > + const char *ins = "\033[H\r\n"; > + const int ins_len = strlen (ins); > + if (rlen + ins_len <= NT_MAX_PATH) > + { > + memmove (&outbuf[start_at + ins_len], &outbuf[start_at], > + rlen - start_at); > + memcpy (&outbuf[start_at], ins, ins_len); > + rlen += ins_len; > + i += ins_len; > + } > + } > + state = 0; > + } > + else if (outbuf[i] == '\033') > + { > + start_at = i; > + is_csi = false; > + state = 1; > + } > + else > + state = 0; > + } > + else if (is_osc) > + { > + if (state == 2 && isdigit (outbuf[i])) > + arg = arg * 10 + (outbuf[i] - '0'); > + else if (state == 2 && outbuf[i] == ';') > + state = 3; > + else if (state == 3 && outbuf[i] == '\033') > + state = 4; > + else if ((state == 3 && outbuf[i] == '\a') > + || (state == 4 && outbuf[i] == '\\')) > + { > + const char *helper_str = "\\bin\\cygwin-console-helper.exe"; > + if (outbuf[start_at + 4] == '?' /* OSC Ps; ? BEL/ST */ > + /* Stray set title at the start up of pcon */ > + || (arg == 0 && memmem (&outbuf[start_at], i + 1 - start_at, > + helper_str, strlen (helper_str)))) > + { > + /* Remove this ESC sequence */ > + memmove (&outbuf[start_at], &outbuf[i+1], rlen-i-1); > + rlen = start_at + rlen - i - 1; > + i = start_at - 1; > + } > + state = 0; > + } > + else if (state == 3) > + continue; > + else if (outbuf[i] == '\033') > + { > + start_at = i; > + is_osc = false; > + state = 1; > + } > + else > + state = 0; > + } > + else > + state = 0; /* Do not reach */ > + return rlen; > +} > + > /* The function pty_master_fwd_thread() should be static because the > instance is deleted if the master is dup()'ed and the original is > closed. In this case, dup()'ed instance still exists, therefore, > @@ -2676,160 +2804,7 @@ fhandler_pty_master::pty_master_fwd_thread (const > master_fwd_thread_param_t *p) > char *ptr = outbuf; > if (p->ttyp->pcon_activated) > { > - /* Avoid setting window title to "cygwin-console-helper.exe" */ > - int state = 0; > - int start_at = 0; > - for (DWORD i=0; i<rlen; i++) > - if (state == 0 && outbuf[i] == '\033') > - { > - start_at = i; > - state = 1; > - continue; > - } > - else if ((state == 1 && outbuf[i] == ']') || > - (state == 2 && outbuf[i] == '0') || > - (state == 3 && outbuf[i] == ';') || > - (state == 4 && outbuf[i] == '\033')) > - { > - state ++; > - continue; > - } > - else if ((state == 4 && outbuf[i] == '\a') > - || (state == 5 && outbuf[i] == '\\')) > - { > - const char *helper_str = "\\bin\\cygwin-console-helper.exe"; > - if (memmem (&outbuf[start_at], i + 1 - start_at, > - helper_str, strlen (helper_str))) > - { > - memmove (&outbuf[start_at], &outbuf[i+1], rlen-i-1); > - rlen = wlen = start_at + rlen - i - 1; > - i = start_at - 1; > - } > - state = 0; > - continue; > - } > - else if (state == 4) > - continue; > - else if (outbuf[i] == '\033') > - { > - start_at = i; > - state = 1; > - continue; > - } > - else > - { > - state = 0; > - continue; > - } > - > - /* Remove CSI > Pm m */ > - state = 0; > - for (DWORD i = 0; i < rlen; i++) > - if (outbuf[i] == '\033') > - { > - start_at = i; > - state = 1; > - continue; > - } > - else if ((state == 1 && outbuf[i] == '[') > - || (state == 2 && outbuf[i] == '>')) > - { > - state ++; > - continue; > - } > - else if (state == 3 && (isdigit (outbuf[i]) || outbuf[i] == ';')) > - continue; > - else if (state == 3 && outbuf[i] == 'm') > - { > - memmove (&outbuf[start_at], &outbuf[i+1], rlen-i-1); > - rlen = wlen = start_at + rlen - i - 1; > - state = 0; > - i = start_at - 1; > - continue; > - } > - else > - state = 0; > - > - /* Remove OSC Ps ; ? BEL/ST */ > - state = 0; > - for (DWORD i = 0; i < rlen; i++) > - if (state == 0 && outbuf[i] == '\033') > - { > - start_at = i; > - state = 1; > - continue; > - } > - else if ((state == 1 && outbuf[i] == ']') > - || (state == 2 && outbuf[i] == ';') > - || (state == 3 && outbuf[i] == '?') > - || (state == 4 && outbuf[i] == '\033')) > - { > - state ++; > - continue; > - } > - else if (state == 2 && isdigit (outbuf[i])) > - continue; > - else if ((state == 4 && outbuf[i] == '\a') > - || (state == 5 && outbuf[i] == '\\')) > - { > - memmove (&outbuf[start_at], &outbuf[i+1], rlen-i-1); > - rlen = wlen = start_at + rlen - i - 1; > - state = 0; > - i = start_at - 1; > - continue; > - } > - else if (outbuf[i] == '\033') > - { > - start_at = i; > - state = 1; > - continue; > - } > - else > - state = 0; > - > - /* Workaround for rlwrap in Win11. rlwrap treats text between > - NLs as a line, however, pseudo console in Win11 somtimes > - omits NL before "CSIm;nH". This does not happen in Win10. */ > - if (wincap.has_pcon_omit_nl_before_cursor_move ()) > - { > - state = 0; > - for (DWORD i = 0; i < rlen; i++) > - if (state == 0 && outbuf[i] == '\033') > - { > - start_at = i; > - state++; > - continue; > - } > - else if (state == 1 && outbuf[i] == '[') > - { > - state++; > - continue; > - } > - else if (state == 2 > - && (isdigit (outbuf[i]) || outbuf[i] == ';')) > - continue; > - else if (state == 2 && outbuf[i] == 'H') > - { > - /* Add omitted CR NL before "CSIm;nH". However, when the > - cusor is on the bottom-most line, adding NL might cause > - unexpected scrolling. To avoid this, add "CSI H" before > - CR NL. */ > - const char *ins = "\033[H\r\n"; > - const int ins_len = strlen (ins); > - if (rlen + ins_len <= NT_MAX_PATH) > - { > - memmove (&outbuf[start_at + ins_len], > - &outbuf[start_at], rlen - start_at); > - memcpy (&outbuf[start_at], ins, ins_len); > - rlen += ins_len; > - i += ins_len; > - } > - state = 0; > - continue; > - } > - else > - state = 0; > - } > + wlen = rlen = workarounds_for_pseudo_console_output (outbuf, rlen); > > if (p->ttyp->term_code_page != CP_UTF8) > { > -- > 2.51.0 > >
