corentin-soriano commented on code in PR #533:
URL: https://github.com/apache/guacamole-server/pull/533#discussion_r2115853718


##########
src/terminal/terminal.c:
##########
@@ -1574,6 +1575,141 @@ int guac_terminal_send_string(guac_terminal* term, 
const char* data) {
 
 }
 
+#define IS_UTF8_START_1_BYTE(c) ((c & 0x80) == 0x00)
+#define IS_UTF8_START_2_BYTE(c) ((c & 0xe0) == 0xc0)
+#define IS_UTF8_START_3_BYTE(c) ((c & 0xf0) == 0xe0)
+#define IS_UTF8_START_4_BYTE(c) ((c & 0xf8) == 0xf0)
+#define IS_UTF8_CONTINUATION(c) ((c & 0xc0) == 0x80)
+
+int guac_terminal_send_clipboard(guac_terminal *term) {
+
+    /* Allocate a temporary buffer for filtering the clipboard contents.
+     * As we're removing characters, we know it will be at most the size
+     * of the original plus the two bracketed paste markers. */
+    char *filtered = guac_mem_alloc(term->clipboard->length +
+            strlen(GUAC_TERMINAL_BRACKETED_PASTE_START) +
+            strlen(GUAC_TERMINAL_BRACKETED_PASTE_STOP));
+    uint8_t *src_ptr = (uint8_t *)term->clipboard->buffer;
+    uint8_t *src_end = (uint8_t *)(term->clipboard->buffer + 
term->clipboard->length);
+    uint8_t *dst_ptr = (uint8_t *)filtered;
+
+    /* Keep track of exactly how much data we've sieved */
+    int filtered_len = 0;
+
+    /* Send the paste start sequence */
+    if (term->bracketed_paste_mode) {
+        size_t seq_len = strlen(GUAC_TERMINAL_BRACKETED_PASTE_START);
+        memcpy(dst_ptr, GUAC_TERMINAL_BRACKETED_PASTE_START, seq_len);
+        dst_ptr += seq_len;
+        filtered_len += seq_len;
+    }
+
+    while (src_ptr < src_end) {
+
+        /* Allow UTF-8 codepoints.
+         * A valid UTF-8 sequence is between one and four bytes in length, and
+         * we can confirm the validity by testing the start bits of each byte.
+         *
+         * A Unicode codepoint is only valid for the smallest UTF-8 sequence 
that
+         * it fits into; larger UTF-8 sequences can only contain larger 
codepoints.
+         * Therefore, some bits in the sequence are required to be used as 
part of
+         * the codepoint number.
+         *
+         * If the sequence is valid, copy it in full. */
+
+        /* UTF-8 1-byte codepoint (U+0000 to U+007F)
+         * Start bits:  0xxxxxxx */
+        if (IS_UTF8_START_1_BYTE(src_ptr[0])) {
+
+            /* Exclude Unicode CO (U+0000 to U+001F) control characters, except
+             * for tab (U+0009), line feed (U+000A) and carriage return 
(U+000D). */
+            if (!((src_ptr[0] >= 0x00) && (src_ptr[0] < 0x20)) ||

Review Comment:
   Although the result is the same, I think `src_ptr[0] <= 0x1f` would be 
closer than `src_ptr[0] < 0x20` to what is documented above.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to