Author: Richard Smith Date: 2021-05-24T16:21:03-07:00 New Revision: de6164ec4da0cfea1b0d0e472c432ea1be4d9c29
URL: https://github.com/llvm/llvm-project/commit/de6164ec4da0cfea1b0d0e472c432ea1be4d9c29 DIFF: https://github.com/llvm/llvm-project/commit/de6164ec4da0cfea1b0d0e472c432ea1be4d9c29.diff LOG: PR50456: Properly handle multiple escaped newlines in a '*/'. Added: Modified: clang/lib/Lex/Lexer.cpp clang/test/Lexer/block_cmt_end.c Removed: ################################################################################ diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp index 75c0fb65f5b1..d31987a432b8 100644 --- a/clang/lib/Lex/Lexer.cpp +++ b/clang/lib/Lex/Lexer.cpp @@ -2443,56 +2443,70 @@ static bool isEndOfBlockCommentWithEscapedNewLine(const char *CurPtr, Lexer *L) { assert(CurPtr[0] == '\n' || CurPtr[0] == '\r'); - // Back up off the newline. - --CurPtr; + // Position of the first trigraph in the ending sequence. + const char *TrigraphPos = 0; + // Position of the first whitespace after a '\' in the ending sequence. + const char *SpacePos = 0; - // If this is a two-character newline sequence, skip the other character. - if (CurPtr[0] == '\n' || CurPtr[0] == '\r') { - // \n\n or \r\r -> not escaped newline. - if (CurPtr[0] == CurPtr[1]) - return false; - // \n\r or \r\n -> skip the newline. + while (true) { + // Back up off the newline. --CurPtr; - } - // If we have horizontal whitespace, skip over it. We allow whitespace - // between the slash and newline. - bool HasSpace = false; - while (isHorizontalWhitespace(*CurPtr) || *CurPtr == 0) { - --CurPtr; - HasSpace = true; - } + // If this is a two-character newline sequence, skip the other character. + if (CurPtr[0] == '\n' || CurPtr[0] == '\r') { + // \n\n or \r\r -> not escaped newline. + if (CurPtr[0] == CurPtr[1]) + return false; + // \n\r or \r\n -> skip the newline. + --CurPtr; + } - // If we have a slash, we know this is an escaped newline. - if (*CurPtr == '\\') { - if (CurPtr[-1] != '*') return false; - } else { - // It isn't a slash, is it the ?? / trigraph? - if (CurPtr[0] != '/' || CurPtr[-1] != '?' || CurPtr[-2] != '?' || - CurPtr[-3] != '*') + // If we have horizontal whitespace, skip over it. We allow whitespace + // between the slash and newline. + while (isHorizontalWhitespace(*CurPtr) || *CurPtr == 0) { + SpacePos = CurPtr; + --CurPtr; + } + + // If we have a slash, this is an escaped newline. + if (*CurPtr == '\\') { + --CurPtr; + } else if (CurPtr[0] == '/' && CurPtr[-1] == '?' && CurPtr[-2] == '?') { + // This is a trigraph encoding of a slash. + TrigraphPos = CurPtr - 2; + CurPtr -= 3; + } else { return false; + } - // This is the trigraph ending the comment. Emit a stern warning! - CurPtr -= 2; + // If the character preceding the escaped newline is a '*', then after line + // splicing we have a '*/' ending the comment. + if (*CurPtr == '*') + break; + + if (*CurPtr != '\n' && *CurPtr != '\r') + return false; + } + if (TrigraphPos) { // If no trigraphs are enabled, warn that we ignored this trigraph and // ignore this * character. if (!L->getLangOpts().Trigraphs) { if (!L->isLexingRawMode()) - L->Diag(CurPtr, diag::trigraph_ignored_block_comment); + L->Diag(TrigraphPos, diag::trigraph_ignored_block_comment); return false; } if (!L->isLexingRawMode()) - L->Diag(CurPtr, diag::trigraph_ends_block_comment); + L->Diag(TrigraphPos, diag::trigraph_ends_block_comment); } // Warn about having an escaped newline between the */ characters. if (!L->isLexingRawMode()) - L->Diag(CurPtr, diag::escaped_newline_block_comment_end); + L->Diag(CurPtr + 1, diag::escaped_newline_block_comment_end); // If there was space between the backslash and newline, warn about it. - if (HasSpace && !L->isLexingRawMode()) - L->Diag(CurPtr, diag::backslash_newline_space); + if (SpacePos && !L->isLexingRawMode()) + L->Diag(SpacePos, diag::backslash_newline_space); return true; } diff --git a/clang/test/Lexer/block_cmt_end.c b/clang/test/Lexer/block_cmt_end.c index 1d00137644c3..7d24817042f4 100644 --- a/clang/test/Lexer/block_cmt_end.c +++ b/clang/test/Lexer/block_cmt_end.c @@ -32,3 +32,14 @@ foo // rdar://6060752 - We should not get warnings about trigraphs in comments: // '????' /* ???? */ + +// PR50456: multiple escaped newlines in one */. +/* + *\ +??/ +??/ +\ +/ +// expected-warning@-5 {{escaped newline}} +// expected-warning@-4 {{separated by space}} +// expected-warning@-6 {{trigraph ends block comment}} _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits