This revision was landed with ongoing or failed builds. This revision was automatically updated to reflect the committed changes. Closed by commit rG95f50964fbf5: Implement P2361 Unevaluated string literals (authored by cor3ntin).
Changed prior to commit: https://reviews.llvm.org/D105759?vs=538073&id=538078#toc Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D105759/new/ https://reviews.llvm.org/D105759 Files: clang-tools-extra/test/clang-tidy/checkers/modernize/unary-static-assert.cpp clang/docs/ReleaseNotes.rst clang/include/clang/AST/Expr.h clang/include/clang/Basic/DiagnosticLexKinds.td clang/include/clang/Basic/DiagnosticSemaKinds.td clang/include/clang/Lex/LiteralSupport.h clang/include/clang/Parse/Parser.h clang/include/clang/Sema/Sema.h clang/lib/AST/Expr.cpp clang/lib/Lex/LiteralSupport.cpp clang/lib/Lex/PPMacroExpansion.cpp clang/lib/Lex/Pragma.cpp clang/lib/Parse/ParseDeclCXX.cpp clang/lib/Parse/ParseExpr.cpp clang/lib/Sema/SemaDeclCXX.cpp clang/lib/Sema/SemaExpr.cpp clang/lib/Sema/SemaExprCXX.cpp clang/lib/Sema/SemaInit.cpp clang/test/CXX/dcl.dcl/dcl.link/p2.cpp clang/test/CXX/dcl.dcl/p4-0x.cpp clang/test/FixIt/fixit-static-assert.cpp clang/test/SemaCXX/static-assert.cpp clang/www/cxx_status.html
Index: clang/www/cxx_status.html =================================================================== --- clang/www/cxx_status.html +++ clang/www/cxx_status.html @@ -115,7 +115,12 @@ <tr> <td>Unevaluated strings</td> <td><a href="https://wg21.link/P2361R6">P2361R6</a></td> - <td class="none" align="center">No</td> + <td class="partial" align="center"> + <details> + <summary>Clang 17 (Partial)</summary> + Attributes arguments don't yet parse as unevaluated string literals. + </details> + </td> </tr> <tr> <td>Add @, $, and ` to the basic character set</td> Index: clang/test/SemaCXX/static-assert.cpp =================================================================== --- clang/test/SemaCXX/static-assert.cpp +++ clang/test/SemaCXX/static-assert.cpp @@ -29,13 +29,23 @@ S<char> s1; // expected-note {{in instantiation of template class 'S<char>' requested here}} S<int> s2; -static_assert(false, L"\xFFFFFFFF"); // expected-error {{static assertion failed: L"\xFFFFFFFF"}} -static_assert(false, u"\U000317FF"); // expected-error {{static assertion failed: u"\U000317FF"}} - -static_assert(false, u8"Ω"); // expected-error {{static assertion failed: u8"\316\251"}} -static_assert(false, L"\u1234"); // expected-error {{static assertion failed: L"\x1234"}} -static_assert(false, L"\x1ff" "0\x123" "fx\xfffff" "goop"); // expected-error {{static assertion failed: L"\x1FF""0\x123""fx\xFFFFFgoop"}} - +static_assert(false, L"\xFFFFFFFF"); // expected-error {{an unevaluated string literal cannot have an encoding prefix}} \ + // expected-error {{invalid escape sequence '\xFFFFFFFF' in an unevaluated string literal}} +static_assert(false, u"\U000317FF"); // expected-error {{an unevaluated string literal cannot have an encoding prefix}} +// FIXME: render this as u8"\u03A9" +static_assert(false, u8"Ω"); // expected-error {{an unevaluated string literal cannot have an encoding prefix}} +static_assert(false, L"\u1234"); // expected-error {{an unevaluated string literal cannot have an encoding prefix}} +static_assert(false, L"\x1ff" // expected-error {{an unevaluated string literal cannot have an encoding prefix}} \ + // expected-error {{invalid escape sequence '\x1ff' in an unevaluated string literal}} + "0\x123" // expected-error {{invalid escape sequence '\x123' in an unevaluated string literal}} + "fx\xfffff" // expected-error {{invalid escape sequence '\xfffff' in an unevaluated string literal}} + "goop"); + +static_assert(false, "\'\"\?\\\a\b\f\n\r\t\v"); // expected-error {{'"?\<U+0007><U+0008>}} +static_assert(true, "\xFF"); // expected-error {{invalid escape sequence '\xFF' in an unevaluated string literal}} +static_assert(true, "\123"); // expected-error {{invalid escape sequence '\123' in an unevaluated string literal}} +static_assert(true, "\pOh no, a Pascal string!"); // expected-warning {{unknown escape sequence '\p'}} \ + // expected-error {{invalid escape sequence '\p' in an unevaluated string literal}} static_assert(false, R"(a \tb c Index: clang/test/FixIt/fixit-static-assert.cpp =================================================================== --- clang/test/FixIt/fixit-static-assert.cpp +++ clang/test/FixIt/fixit-static-assert.cpp @@ -11,8 +11,6 @@ // String literal prefixes are good. static_assert(true && R"(RawString)"); // CHECK-DAG: {[[@LINE-1]]:20-[[@LINE-1]]:22}:"," -static_assert(true && L"RawString"); -// CHECK-DAG: {[[@LINE-1]]:20-[[@LINE-1]]:22}:"," static_assert(true); // CHECK-DAG: {[[@LINE-1]]:19-[[@LINE-1]]:19}:", \"\"" Index: clang/test/CXX/dcl.dcl/p4-0x.cpp =================================================================== --- clang/test/CXX/dcl.dcl/p4-0x.cpp +++ clang/test/CXX/dcl.dcl/p4-0x.cpp @@ -18,4 +18,7 @@ static_assert(T(), ""); static_assert(U(), ""); // expected-error {{ambiguous}} -static_assert(false, L"\x14hi" "!" R"x(")x"); // expected-error {{static assertion failed: L"\024hi!\""}} +static_assert(false, L"\x14hi" // expected-error {{an unevaluated string literal cannot have an encoding prefix}} \ + // expected-error {{invalid escape sequence '\x14' in an unevaluated string literal}} + "!" + R"x(")x"); Index: clang/test/CXX/dcl.dcl/dcl.link/p2.cpp =================================================================== --- clang/test/CXX/dcl.dcl/dcl.link/p2.cpp +++ clang/test/CXX/dcl.dcl/dcl.link/p2.cpp @@ -8,7 +8,7 @@ extern "C" plusplus { } -extern u8"C" {} // expected-error {{string literal in language linkage specifier cannot have an encoding-prefix}} -extern L"C" {} // expected-error {{string literal in language linkage specifier cannot have an encoding-prefix}} -extern u"C++" {} // expected-error {{string literal in language linkage specifier cannot have an encoding-prefix}} -extern U"C" {} // expected-error {{string literal in language linkage specifier cannot have an encoding-prefix}} +extern u8"C" {} // expected-error {{an unevaluated string literal cannot have an encoding prefix}} +extern L"C" {} // expected-error {{an unevaluated string literal cannot have an encoding prefix}} +extern u"C++" {} // expected-error {{an unevaluated string literal cannot have an encoding prefix}} +extern U"C" {} // expected-error {{an unevaluated string literal cannot have an encoding prefix}} Index: clang/lib/Sema/SemaInit.cpp =================================================================== --- clang/lib/Sema/SemaInit.cpp +++ clang/lib/Sema/SemaInit.cpp @@ -143,6 +143,9 @@ if (IsWideCharCompatible(ElemTy, Context)) return SIF_IncompatWideStringIntoWideChar; return SIF_Other; + case StringLiteral::Unevaluated: + assert(false && "Unevaluated string literal in initialization"); + break; } llvm_unreachable("missed a StringLiteral kind?"); Index: clang/lib/Sema/SemaExprCXX.cpp =================================================================== --- clang/lib/Sema/SemaExprCXX.cpp +++ clang/lib/Sema/SemaExprCXX.cpp @@ -4088,6 +4088,9 @@ case StringLiteral::Wide: return Context.typesAreCompatible(Context.getWideCharType(), QualType(ToPointeeType, 0)); + case StringLiteral::Unevaluated: + assert(false && "Unevaluated string literal in expression"); + break; } } } Index: clang/lib/Sema/SemaExpr.cpp =================================================================== --- clang/lib/Sema/SemaExpr.cpp +++ clang/lib/Sema/SemaExpr.cpp @@ -1921,6 +1921,30 @@ return S.BuildLiteralOperatorCall(R, OpNameInfo, Args, LitEndLoc); } +ExprResult Sema::ActOnUnevaluatedStringLiteral(ArrayRef<Token> StringToks) { + StringLiteralParser Literal(StringToks, PP, + StringLiteralEvalMethod::Unevaluated); + if (Literal.hadError) + return ExprError(); + + SmallVector<SourceLocation, 4> StringTokLocs; + for (const Token &Tok : StringToks) + StringTokLocs.push_back(Tok.getLocation()); + + StringLiteral *Lit = StringLiteral::Create( + Context, Literal.GetString(), StringLiteral::Unevaluated, false, {}, + &StringTokLocs[0], StringTokLocs.size()); + + if (!Literal.getUDSuffix().empty()) { + SourceLocation UDSuffixLoc = + getUDSuffixLoc(*this, StringTokLocs[Literal.getUDSuffixToken()], + Literal.getUDSuffixOffset()); + return ExprError(Diag(UDSuffixLoc, diag::err_invalid_string_udl)); + } + + return Lit; +} + /// ActOnStringLiteral - The specified tokens were lexed as pasted string /// fragments (e.g. "foo" "bar" L"baz"). The result string has to handle string /// concatenation ([C99 5.1.1.2, translation phase #6]), so it may come from Index: clang/lib/Sema/SemaDeclCXX.cpp =================================================================== --- clang/lib/Sema/SemaDeclCXX.cpp +++ clang/lib/Sema/SemaDeclCXX.cpp @@ -16471,11 +16471,7 @@ Expr *LangStr, SourceLocation LBraceLoc) { StringLiteral *Lit = cast<StringLiteral>(LangStr); - if (!Lit->isOrdinary()) { - Diag(LangStr->getExprLoc(), diag::err_language_linkage_spec_not_ascii) - << LangStr->getSourceRange(); - return nullptr; - } + assert(Lit->isUnevaluated() && "Unexpected string literal kind"); StringRef Lang = Lit->getString(); LinkageSpecDecl::LanguageIDs Language; @@ -16940,10 +16936,7 @@ llvm::raw_svector_ostream Msg(MsgBuffer); if (AssertMessage) { const auto *MsgStr = cast<StringLiteral>(AssertMessage); - if (MsgStr->isOrdinary()) - Msg << MsgStr->getString(); - else - MsgStr->printPretty(Msg, nullptr, getPrintingPolicy()); + Msg << MsgStr->getString(); } Expr *InnerCond = nullptr; Index: clang/lib/Parse/ParseExpr.cpp =================================================================== --- clang/lib/Parse/ParseExpr.cpp +++ clang/lib/Parse/ParseExpr.cpp @@ -3256,6 +3256,17 @@ /// string-literal /// \verbatim ExprResult Parser::ParseStringLiteralExpression(bool AllowUserDefinedLiteral) { + return ParseStringLiteralExpression(AllowUserDefinedLiteral, + /*Unevaluated=*/false); +} + +ExprResult Parser::ParseUnevaluatedStringLiteralExpression() { + return ParseStringLiteralExpression(/*AllowUserDefinedLiteral=*/false, + /*Unevaluated=*/true); +} + +ExprResult Parser::ParseStringLiteralExpression(bool AllowUserDefinedLiteral, + bool Unevaluated) { assert(isTokenStringLiteral() && "Not a string literal!"); // String concat. Note that keywords like __func__ and __FUNCTION__ are not @@ -3267,6 +3278,11 @@ ConsumeStringToken(); } while (isTokenStringLiteral()); + if (Unevaluated) { + assert(!AllowUserDefinedLiteral && "UDL are always evaluated"); + return Actions.ActOnUnevaluatedStringLiteral(StringToks); + } + // Pass the set of string tokens, ready for concatenation, to the actions. return Actions.ActOnStringLiteral(StringToks, AllowUserDefinedLiteral ? getCurScope() Index: clang/lib/Parse/ParseDeclCXX.cpp =================================================================== --- clang/lib/Parse/ParseDeclCXX.cpp +++ clang/lib/Parse/ParseDeclCXX.cpp @@ -350,7 +350,7 @@ /// Decl *Parser::ParseLinkage(ParsingDeclSpec &DS, DeclaratorContext Context) { assert(isTokenStringLiteral() && "Not a string literal!"); - ExprResult Lang = ParseStringLiteralExpression(false); + ExprResult Lang = ParseUnevaluatedStringLiteralExpression(); ParseScope LinkageScope(this, Scope::DeclScope); Decl *LinkageSpec = @@ -1023,7 +1023,7 @@ return nullptr; } - AssertMessage = ParseStringLiteralExpression(); + AssertMessage = ParseUnevaluatedStringLiteralExpression(); if (AssertMessage.isInvalid()) { SkipMalformedDecl(); return nullptr; Index: clang/lib/Lex/Pragma.cpp =================================================================== --- clang/lib/Lex/Pragma.cpp +++ clang/lib/Lex/Pragma.cpp @@ -1088,7 +1088,8 @@ if (DiagName.is(tok::eod)) PP.getDiagnostics().dump(); else if (DiagName.is(tok::string_literal) && !DiagName.hasUDSuffix()) { - StringLiteralParser Literal(DiagName, PP); + StringLiteralParser Literal(DiagName, PP, + StringLiteralEvalMethod::Unevaluated); if (Literal.hadError) return; PP.getDiagnostics().dump(Literal.GetString()); Index: clang/lib/Lex/PPMacroExpansion.cpp =================================================================== --- clang/lib/Lex/PPMacroExpansion.cpp +++ clang/lib/Lex/PPMacroExpansion.cpp @@ -1869,7 +1869,8 @@ if (!Tok.isAnnotation() && Tok.getIdentifierInfo()) Tok.setKind(tok::identifier); else if (Tok.is(tok::string_literal) && !Tok.hasUDSuffix()) { - StringLiteralParser Literal(Tok, *this); + StringLiteralParser Literal(Tok, *this, + StringLiteralEvalMethod::Unevaluated); if (Literal.hadError) return; Index: clang/lib/Lex/LiteralSupport.cpp =================================================================== --- clang/lib/Lex/LiteralSupport.cpp +++ clang/lib/Lex/LiteralSupport.cpp @@ -87,6 +87,24 @@ MakeCharSourceRange(Features, TokLoc, TokBegin, TokRangeBegin, TokRangeEnd); } +static bool IsEscapeValidInUnevaluatedStringLiteral(char Escape) { + switch (Escape) { + case '\'': + case '"': + case '?': + case '\\': + case 'a': + case 'b': + case 'f': + case 'n': + case 'r': + case 't': + case 'v': + return true; + } + return false; +} + /// ProcessCharEscape - Parse a standard C escape sequence, which can occur in /// either a character or a string literal. static unsigned ProcessCharEscape(const char *ThisTokBegin, @@ -94,7 +112,8 @@ const char *ThisTokEnd, bool &HadError, FullSourceLoc Loc, unsigned CharWidth, DiagnosticsEngine *Diags, - const LangOptions &Features) { + const LangOptions &Features, + StringLiteralEvalMethod EvalMethod) { const char *EscapeBegin = ThisTokBuf; bool Delimited = false; bool EndDelimiterFound = false; @@ -105,6 +124,7 @@ // We know that this character can't be off the end of the buffer, because // that would have been \", which would not have been the end of string. unsigned ResultChar = *ThisTokBuf++; + char Escape = ResultChar; switch (ResultChar) { // These map to themselves. case '\\': case '\'': case '"': case '?': break; @@ -318,6 +338,12 @@ } } + if (EvalMethod == StringLiteralEvalMethod::Unevaluated && + !IsEscapeValidInUnevaluatedStringLiteral(Escape)) { + Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf, + diag::err_unevaluated_string_invalid_escape_sequence) + << StringRef(EscapeBegin, ThisTokBuf - EscapeBegin); + } return ResultChar; } @@ -1727,9 +1753,10 @@ } unsigned CharWidth = getCharWidth(Kind, PP.getTargetInfo()); uint64_t result = - ProcessCharEscape(TokBegin, begin, end, HadError, - FullSourceLoc(Loc,PP.getSourceManager()), - CharWidth, &PP.getDiagnostics(), PP.getLangOpts()); + ProcessCharEscape(TokBegin, begin, end, HadError, + FullSourceLoc(Loc, PP.getSourceManager()), CharWidth, + &PP.getDiagnostics(), PP.getLangOpts(), + StringLiteralEvalMethod::Evaluated); *buffer_begin++ = result; } @@ -1837,13 +1864,14 @@ /// hex-digit hex-digit hex-digit hex-digit /// \endverbatim /// -StringLiteralParser:: -StringLiteralParser(ArrayRef<Token> StringToks, - Preprocessor &PP) - : SM(PP.getSourceManager()), Features(PP.getLangOpts()), - Target(PP.getTargetInfo()), Diags(&PP.getDiagnostics()), - MaxTokenLength(0), SizeBound(0), CharByteWidth(0), Kind(tok::unknown), - ResultPtr(ResultBuf.data()), hadError(false), Pascal(false) { +StringLiteralParser::StringLiteralParser(ArrayRef<Token> StringToks, + Preprocessor &PP, + StringLiteralEvalMethod EvalMethod) + : SM(PP.getSourceManager()), Features(PP.getLangOpts()), + Target(PP.getTargetInfo()), Diags(&PP.getDiagnostics()), + MaxTokenLength(0), SizeBound(0), CharByteWidth(0), Kind(tok::unknown), + ResultPtr(ResultBuf.data()), EvalMethod(EvalMethod), hadError(false), + Pascal(false) { init(StringToks); } @@ -1860,12 +1888,12 @@ assert(!StringToks.empty() && "expected at least one token"); MaxTokenLength = StringToks[0].getLength(); assert(StringToks[0].getLength() >= 2 && "literal token is invalid!"); - SizeBound = StringToks[0].getLength()-2; // -2 for "". - Kind = StringToks[0].getKind(); - + SizeBound = StringToks[0].getLength() - 2; // -2 for "". hadError = false; - // Implement Translation Phase #6: concatenation of string literals + // Determines the kind of string from the prefix + Kind = tok::string_literal; + /// (C99 5.1.1.2p1). The common case is only one string fragment. for (const Token &Tok : StringToks) { if (Tok.getLength() < 2) @@ -1882,7 +1910,11 @@ // Remember if we see any wide or utf-8/16/32 strings. // Also check for illegal concatenations. - if (Tok.isNot(Kind) && Tok.isNot(tok::string_literal)) { + if (isUnevaluated() && Tok.getKind() != tok::string_literal) { + if (Diags) + Diags->Report(Tok.getLocation(), diag::err_unevaluated_string_prefix); + hadError = true; + } else if (Tok.isNot(Kind) && Tok.isNot(tok::string_literal)) { if (isOrdinary()) { Kind = Tok.getKind(); } else { @@ -1965,13 +1997,18 @@ // result of a concatenation involving at least one user-defined-string- // literal, all the participating user-defined-string-literals shall // have the same ud-suffix. - if (UDSuffixBuf != UDSuffix) { + bool UnevaluatedStringHasUDL = isUnevaluated() && !UDSuffix.empty(); + if (UDSuffixBuf != UDSuffix || UnevaluatedStringHasUDL) { if (Diags) { SourceLocation TokLoc = StringToks[i].getLocation(); - Diags->Report(TokLoc, diag::err_string_concat_mixed_suffix) - << UDSuffixBuf << UDSuffix - << SourceRange(UDSuffixTokLoc, UDSuffixTokLoc) - << SourceRange(TokLoc, TokLoc); + if (UnevaluatedStringHasUDL) { + Diags->Report(TokLoc, diag::err_unevaluated_string_udl) + << SourceRange(TokLoc, TokLoc); + } else { + Diags->Report(TokLoc, diag::err_string_concat_mixed_suffix) + << UDSuffixBuf << UDSuffix + << SourceRange(UDSuffixTokLoc, UDSuffixTokLoc); + } } hadError = true; } @@ -2043,8 +2080,9 @@ ++ThisTokBuf; // skip " // Check if this is a pascal string - if (Features.PascalStrings && ThisTokBuf + 1 != ThisTokEnd && - ThisTokBuf[0] == '\\' && ThisTokBuf[1] == 'p') { + if (!isUnevaluated() && Features.PascalStrings && + ThisTokBuf + 1 != ThisTokEnd && ThisTokBuf[0] == '\\' && + ThisTokBuf[1] == 'p') { // If the \p sequence is found in the first token, we have a pascal string // Otherwise, if we already have a pascal string, ignore the first \p @@ -2080,9 +2118,9 @@ } // Otherwise, this is a non-UCN escape character. Process it. unsigned ResultChar = - ProcessCharEscape(ThisTokBegin, ThisTokBuf, ThisTokEnd, hadError, - FullSourceLoc(StringToks[i].getLocation(), SM), - CharByteWidth*8, Diags, Features); + ProcessCharEscape(ThisTokBegin, ThisTokBuf, ThisTokEnd, hadError, + FullSourceLoc(StringToks[i].getLocation(), SM), + CharByteWidth * 8, Diags, Features, EvalMethod); if (CharByteWidth == 4) { // FIXME: Make the type of the result buffer correct instead of @@ -2104,6 +2142,8 @@ } } + assert((!Pascal || !isUnevaluated()) && + "Pascal string in unevaluated context"); if (Pascal) { if (CharByteWidth == 4) { // FIXME: Make the type of the result buffer correct instead of @@ -2277,8 +2317,8 @@ ByteNo -= Len; } else { ProcessCharEscape(SpellingStart, SpellingPtr, SpellingEnd, HadError, - FullSourceLoc(Tok.getLocation(), SM), - CharByteWidth*8, Diags, Features); + FullSourceLoc(Tok.getLocation(), SM), CharByteWidth * 8, + Diags, Features, StringLiteralEvalMethod::Evaluated); --ByteNo; } assert(!HadError && "This method isn't valid on erroneous strings"); Index: clang/lib/AST/Expr.cpp =================================================================== --- clang/lib/AST/Expr.cpp +++ clang/lib/AST/Expr.cpp @@ -1136,6 +1136,8 @@ case UTF32: CharByteWidth = Target.getChar32Width(); break; + case Unevaluated: + return sizeof(char); // Host; } assert((CharByteWidth & 7) == 0 && "Assumes character size is byte multiple"); CharByteWidth /= 8; @@ -1149,35 +1151,45 @@ const SourceLocation *Loc, unsigned NumConcatenated) : Expr(StringLiteralClass, Ty, VK_LValue, OK_Ordinary) { - assert(Ctx.getAsConstantArrayType(Ty) && - "StringLiteral must be of constant array type!"); - unsigned CharByteWidth = mapCharByteWidth(Ctx.getTargetInfo(), Kind); - unsigned ByteLength = Str.size(); - assert((ByteLength % CharByteWidth == 0) && - "The size of the data must be a multiple of CharByteWidth!"); - - // Avoid the expensive division. The compiler should be able to figure it - // out by itself. However as of clang 7, even with the appropriate - // llvm_unreachable added just here, it is not able to do so. - unsigned Length; - switch (CharByteWidth) { - case 1: - Length = ByteLength; - break; - case 2: - Length = ByteLength / 2; - break; - case 4: - Length = ByteLength / 4; - break; - default: - llvm_unreachable("Unsupported character width!"); - } + + unsigned Length = Str.size(); StringLiteralBits.Kind = Kind; - StringLiteralBits.CharByteWidth = CharByteWidth; - StringLiteralBits.IsPascal = Pascal; StringLiteralBits.NumConcatenated = NumConcatenated; + + if (Kind != StringKind::Unevaluated) { + assert(Ctx.getAsConstantArrayType(Ty) && + "StringLiteral must be of constant array type!"); + unsigned CharByteWidth = mapCharByteWidth(Ctx.getTargetInfo(), Kind); + unsigned ByteLength = Str.size(); + assert((ByteLength % CharByteWidth == 0) && + "The size of the data must be a multiple of CharByteWidth!"); + + // Avoid the expensive division. The compiler should be able to figure it + // out by itself. However as of clang 7, even with the appropriate + // llvm_unreachable added just here, it is not able to do so. + switch (CharByteWidth) { + case 1: + Length = ByteLength; + break; + case 2: + Length = ByteLength / 2; + break; + case 4: + Length = ByteLength / 4; + break; + default: + llvm_unreachable("Unsupported character width!"); + } + + StringLiteralBits.CharByteWidth = CharByteWidth; + StringLiteralBits.IsPascal = Pascal; + } else { + assert(!Pascal && "Can't make an unevaluated Pascal string"); + StringLiteralBits.CharByteWidth = 1; + StringLiteralBits.IsPascal = false; + } + *getTrailingObjects<unsigned>() = Length; // Initialize the trailing array of SourceLocation. @@ -1186,7 +1198,7 @@ NumConcatenated * sizeof(SourceLocation)); // Initialize the trailing array of char holding the string data. - std::memcpy(getTrailingObjects<char>(), Str.data(), ByteLength); + std::memcpy(getTrailingObjects<char>(), Str.data(), Str.size()); setDependence(ExprDependence::None); } @@ -1223,6 +1235,7 @@ void StringLiteral::outputString(raw_ostream &OS) const { switch (getKind()) { + case Unevaluated: case Ordinary: break; // no prefix. case Wide: OS << 'L'; break; @@ -1333,7 +1346,8 @@ const TargetInfo &Target, unsigned *StartToken, unsigned *StartTokenByteOffset) const { assert((getKind() == StringLiteral::Ordinary || - getKind() == StringLiteral::UTF8) && + getKind() == StringLiteral::UTF8 || + getKind() == StringLiteral::Unevaluated) && "Only narrow string literals are currently supported"); // Loop over all of the tokens in this string until we find the one that Index: clang/include/clang/Sema/Sema.h =================================================================== --- clang/include/clang/Sema/Sema.h +++ clang/include/clang/Sema/Sema.h @@ -5703,6 +5703,8 @@ ExprResult ActOnStringLiteral(ArrayRef<Token> StringToks, Scope *UDLScope = nullptr); + ExprResult ActOnUnevaluatedStringLiteral(ArrayRef<Token> StringToks); + /// ControllingExprOrType is either an opaque pointer coming out of a /// ParsedType or an Expr *. FIXME: it'd be better to split this interface /// into two so we don't take a void *, but that's awkward because one of Index: clang/include/clang/Parse/Parser.h =================================================================== --- clang/include/clang/Parse/Parser.h +++ clang/include/clang/Parse/Parser.h @@ -1788,8 +1788,12 @@ bool IsUnevaluated); ExprResult ParseStringLiteralExpression(bool AllowUserDefinedLiteral = false); + ExprResult ParseUnevaluatedStringLiteralExpression(); private: + ExprResult ParseStringLiteralExpression(bool AllowUserDefinedLiteral, + bool Unevaluated); + ExprResult ParseExpressionWithLeadingAt(SourceLocation AtLoc); ExprResult ParseExpressionWithLeadingExtension(SourceLocation ExtLoc); Index: clang/include/clang/Lex/LiteralSupport.h =================================================================== --- clang/include/clang/Lex/LiteralSupport.h +++ clang/include/clang/Lex/LiteralSupport.h @@ -212,6 +212,11 @@ } }; +enum class StringLiteralEvalMethod { + Evaluated, + Unevaluated, +}; + /// StringLiteralParser - This decodes string escape characters and performs /// wide string analysis and Translation Phase #6 (concatenation of string /// literals) (C99 5.1.1.2p1). @@ -230,20 +235,23 @@ SmallString<32> UDSuffixBuf; unsigned UDSuffixToken; unsigned UDSuffixOffset; + StringLiteralEvalMethod EvalMethod; + public: - StringLiteralParser(ArrayRef<Token> StringToks, - Preprocessor &PP); - StringLiteralParser(ArrayRef<Token> StringToks, - const SourceManager &sm, const LangOptions &features, - const TargetInfo &target, + StringLiteralParser(ArrayRef<Token> StringToks, Preprocessor &PP, + StringLiteralEvalMethod StringMethod = + StringLiteralEvalMethod::Evaluated); + StringLiteralParser(ArrayRef<Token> StringToks, const SourceManager &sm, + const LangOptions &features, const TargetInfo &target, DiagnosticsEngine *diags = nullptr) - : SM(sm), Features(features), Target(target), Diags(diags), - MaxTokenLength(0), SizeBound(0), CharByteWidth(0), Kind(tok::unknown), - ResultPtr(ResultBuf.data()), hadError(false), Pascal(false) { + : SM(sm), Features(features), Target(target), Diags(diags), + MaxTokenLength(0), SizeBound(0), CharByteWidth(0), Kind(tok::unknown), + ResultPtr(ResultBuf.data()), + EvalMethod(StringLiteralEvalMethod::Evaluated), hadError(false), + Pascal(false) { init(StringToks); } - bool hadError; bool Pascal; @@ -269,6 +277,9 @@ bool isUTF16() const { return Kind == tok::utf16_string_literal; } bool isUTF32() const { return Kind == tok::utf32_string_literal; } bool isPascal() const { return Pascal; } + bool isUnevaluated() const { + return EvalMethod == StringLiteralEvalMethod::Unevaluated; + } StringRef getUDSuffix() const { return UDSuffixBuf; } Index: clang/include/clang/Basic/DiagnosticSemaKinds.td =================================================================== --- clang/include/clang/Basic/DiagnosticSemaKinds.td +++ clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -433,9 +433,6 @@ "ISO C requires a named parameter before '...'">; def err_declarator_need_ident : Error<"declarator requires an identifier">; def err_language_linkage_spec_unknown : Error<"unknown linkage language">; -def err_language_linkage_spec_not_ascii : Error< - "string literal in language linkage specifier cannot have an " - "encoding-prefix">; def ext_use_out_of_scope_declaration : ExtWarn< "use of out-of-scope declaration of %0%select{| whose type is not " "compatible with that of an implicit declaration}1">, Index: clang/include/clang/Basic/DiagnosticLexKinds.td =================================================================== --- clang/include/clang/Basic/DiagnosticLexKinds.td +++ clang/include/clang/Basic/DiagnosticLexKinds.td @@ -276,6 +276,13 @@ "identifier">, InGroup<ReservedUserDefinedLiteral>; def err_unsupported_string_concat : Error< "unsupported non-standard concatenation of string literals">; + +def err_unevaluated_string_prefix : Error< + "an unevaluated string literal cannot have an encoding prefix">; +def err_unevaluated_string_udl : Error< + "an unevaluated string literal cannot be a user-defined literal">; +def err_unevaluated_string_invalid_escape_sequence : Error< + "invalid escape sequence '%0' in an unevaluated string literal">; def err_string_concat_mixed_suffix : Error< "differing user-defined suffixes ('%0' and '%1') in string literal " "concatenation">; Index: clang/include/clang/AST/Expr.h =================================================================== --- clang/include/clang/AST/Expr.h +++ clang/include/clang/AST/Expr.h @@ -1804,7 +1804,7 @@ /// * An array of getByteLength() char used to store the string data. public: - enum StringKind { Ordinary, Wide, UTF8, UTF16, UTF32 }; + enum StringKind { Ordinary, Wide, UTF8, UTF16, UTF32, Unevaluated }; private: unsigned numTrailingObjects(OverloadToken<unsigned>) const { return 1; } @@ -1866,7 +1866,7 @@ unsigned CharByteWidth); StringRef getString() const { - assert(getCharByteWidth() == 1 && + assert((isUnevaluated() || getCharByteWidth() == 1) && "This function is used in places that assume strings use char"); return StringRef(getStrDataAsChar(), getByteLength()); } @@ -1906,6 +1906,7 @@ bool isUTF8() const { return getKind() == UTF8; } bool isUTF16() const { return getKind() == UTF16; } bool isUTF32() const { return getKind() == UTF32; } + bool isUnevaluated() const { return getKind() == Unevaluated; } bool isPascal() const { return StringLiteralBits.IsPascal; } bool containsNonAscii() const { Index: clang/docs/ReleaseNotes.rst =================================================================== --- clang/docs/ReleaseNotes.rst +++ clang/docs/ReleaseNotes.rst @@ -135,6 +135,8 @@ ^^^^^^^^^^^^^^^^^^^^^ - Compiler flags ``-std=c++2c`` and ``-std=gnu++2c`` have been added for experimental C++2c implementation work. - Implemented `P2738R1: constexpr cast from void* <https://wg21.link/P2738R1>`_. +- Partially implemented `P2361R6: constexpr cast from void* <https://wg21.link/P2361R6>`_. + The changes to attributes declarations are not part of this release. Resolutions to C++ Defect Reports ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Index: clang-tools-extra/test/clang-tidy/checkers/modernize/unary-static-assert.cpp =================================================================== --- clang-tools-extra/test/clang-tidy/checkers/modernize/unary-static-assert.cpp +++ clang-tools-extra/test/clang-tidy/checkers/modernize/unary-static-assert.cpp @@ -7,9 +7,6 @@ static_assert(sizeof(a) <= 10, ""); // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: use unary 'static_assert' when the string literal is an empty string [modernize-unary-static-assert] // CHECK-FIXES: {{^}} static_assert(sizeof(a) <= 10 );{{$}} - static_assert(sizeof(a) <= 12, L""); - // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: use unary 'static_assert' when - // CHECK-FIXES: {{^}} static_assert(sizeof(a) <= 12 );{{$}} FOO // CHECK-FIXES: {{^}} FOO{{$}} static_assert(sizeof(a) <= 17, MSG);
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits