https://github.com/igorkudrin updated https://github.com/llvm/llvm-project/pull/70898
>From 113c03bbf773c71d329ab2afd063753365e4ac68 Mon Sep 17 00:00:00 2001 From: Igor Kudrin <ikud...@accesssoftek.com> Date: Thu, 26 Oct 2023 13:19:08 -0700 Subject: [PATCH] [YAMLParser] Unfold multi-line scalar values Long scalar values can be split into multiple lines to improve readability. The rules are described in Section 6.5. "Line Folding", https://yaml.org/spec/1.2.2/#65-line-folding. In addition, for flow scalar styles, the Spec states that "All leading and trailing white space characters on each line are excluded from the content", https://yaml.org/spec/1.2.2/#73-flow-scalar-styles. The patch implements these unfolding rules for double-quoted, single-quoted, and plain scalars. --- llvm/include/llvm/Support/YAMLParser.h | 9 +- llvm/lib/Support/YAMLParser.cpp | 364 +++++++++++++----------- llvm/test/YAMLParser/spec-09-01.test | 11 +- llvm/test/YAMLParser/spec-09-02.test | 31 +- llvm/test/YAMLParser/spec-09-03.test | 7 +- llvm/test/YAMLParser/spec-09-04.test | 3 +- llvm/test/YAMLParser/spec-09-05.test | 7 +- llvm/test/YAMLParser/spec-09-06.test | 3 +- llvm/test/YAMLParser/spec-09-07.test | 11 +- llvm/test/YAMLParser/spec-09-08.test | 15 +- llvm/test/YAMLParser/spec-09-09.test | 7 +- llvm/test/YAMLParser/spec-09-10.test | 3 +- llvm/test/YAMLParser/spec-09-11.test | 6 +- llvm/test/YAMLParser/spec-09-13.test | 11 +- llvm/test/YAMLParser/spec-09-16.test | 17 +- llvm/test/YAMLParser/spec-09-17.test | 3 +- llvm/test/YAMLParser/spec1.2-07-05.test | 8 + llvm/test/YAMLParser/spec1.2-07-06.test | 7 + llvm/test/YAMLParser/spec1.2-07-09.test | 7 + llvm/test/YAMLParser/spec1.2-07-12.test | 7 + llvm/test/YAMLParser/spec1.2-07-14.test | 23 ++ 21 files changed, 367 insertions(+), 193 deletions(-) create mode 100644 llvm/test/YAMLParser/spec1.2-07-05.test create mode 100644 llvm/test/YAMLParser/spec1.2-07-06.test create mode 100644 llvm/test/YAMLParser/spec1.2-07-09.test create mode 100644 llvm/test/YAMLParser/spec1.2-07-12.test create mode 100644 llvm/test/YAMLParser/spec1.2-07-14.test diff --git a/llvm/include/llvm/Support/YAMLParser.h b/llvm/include/llvm/Support/YAMLParser.h index f4767641647c217..9d95a1e13a0dff4 100644 --- a/llvm/include/llvm/Support/YAMLParser.h +++ b/llvm/include/llvm/Support/YAMLParser.h @@ -240,9 +240,14 @@ class ScalarNode final : public Node { private: StringRef Value; - StringRef unescapeDoubleQuoted(StringRef UnquotedValue, - StringRef::size_type Start, + StringRef getDoubleQuotedValue(StringRef UnquotedValue, SmallVectorImpl<char> &Storage) const; + + static StringRef getSingleQuotedValue(StringRef RawValue, + SmallVectorImpl<char> &Storage); + + static StringRef getPlainValue(StringRef RawValue, + SmallVectorImpl<char> &Storage); }; /// A block scalar node is an opaque datum that can be presented as a diff --git a/llvm/lib/Support/YAMLParser.cpp b/llvm/lib/Support/YAMLParser.cpp index 1422e40f91944ae..96b9aa95a96b3a6 100644 --- a/llvm/lib/Support/YAMLParser.cpp +++ b/llvm/lib/Support/YAMLParser.cpp @@ -2030,187 +2030,219 @@ bool Node::failed() const { } StringRef ScalarNode::getValue(SmallVectorImpl<char> &Storage) const { - // TODO: Handle newlines properly. We need to remove leading whitespace. - if (Value[0] == '"') { // Double quoted. - // Pull off the leading and trailing "s. - StringRef UnquotedValue = Value.substr(1, Value.size() - 2); - // Search for characters that would require unescaping the value. - StringRef::size_type i = UnquotedValue.find_first_of("\\\r\n"); - if (i != StringRef::npos) - return unescapeDoubleQuoted(UnquotedValue, i, Storage); + if (Value[0] == '"') + return getDoubleQuotedValue(Value, Storage); + if (Value[0] == '\'') + return getSingleQuotedValue(Value, Storage); + return getPlainValue(Value, Storage); +} + +static StringRef +parseScalarValue(StringRef UnquotedValue, SmallVectorImpl<char> &Storage, + StringRef LookupChars, + std::function<StringRef(StringRef, SmallVectorImpl<char> &)> + UnescapeCallback) { + size_t I = UnquotedValue.find_first_of(LookupChars); + if (I == StringRef::npos) return UnquotedValue; - } else if (Value[0] == '\'') { // Single quoted. - // Pull off the leading and trailing 's. - StringRef UnquotedValue = Value.substr(1, Value.size() - 2); - StringRef::size_type i = UnquotedValue.find('\''); - if (i != StringRef::npos) { - // We're going to need Storage. - Storage.clear(); - Storage.reserve(UnquotedValue.size()); - for (; i != StringRef::npos; i = UnquotedValue.find('\'')) { - StringRef Valid(UnquotedValue.begin(), i); - llvm::append_range(Storage, Valid); - Storage.push_back('\''); - UnquotedValue = UnquotedValue.substr(i + 2); - } - llvm::append_range(Storage, UnquotedValue); - return StringRef(Storage.begin(), Storage.size()); - } - return UnquotedValue; - } - // Plain. - // Trim whitespace ('b-char' and 's-white'). - // NOTE: Alternatively we could change the scanner to not include whitespace - // here in the first place. - return Value.rtrim("\x0A\x0D\x20\x09"); -} -StringRef ScalarNode::unescapeDoubleQuoted( StringRef UnquotedValue - , StringRef::size_type i - , SmallVectorImpl<char> &Storage) - const { - // Use Storage to build proper value. Storage.clear(); Storage.reserve(UnquotedValue.size()); - for (; i != StringRef::npos; i = UnquotedValue.find_first_of("\\\r\n")) { - // Insert all previous chars into Storage. - StringRef Valid(UnquotedValue.begin(), i); - llvm::append_range(Storage, Valid); - // Chop off inserted chars. - UnquotedValue = UnquotedValue.substr(i); - - assert(!UnquotedValue.empty() && "Can't be empty!"); - - // Parse escape or line break. - switch (UnquotedValue[0]) { - case '\r': - case '\n': - Storage.push_back('\n'); - if ( UnquotedValue.size() > 1 - && (UnquotedValue[1] == '\r' || UnquotedValue[1] == '\n')) - UnquotedValue = UnquotedValue.substr(1); - UnquotedValue = UnquotedValue.substr(1); - break; - default: - if (UnquotedValue.size() == 1) { - Token T; - T.Range = StringRef(UnquotedValue.begin(), 1); - setError("Unrecognized escape code", T); - return ""; - } - UnquotedValue = UnquotedValue.substr(1); - switch (UnquotedValue[0]) { - default: { - Token T; - T.Range = StringRef(UnquotedValue.begin(), 1); - setError("Unrecognized escape code", T); - return ""; - } - case '\r': - case '\n': - // Remove the new line. - if ( UnquotedValue.size() > 1 - && (UnquotedValue[1] == '\r' || UnquotedValue[1] == '\n')) - UnquotedValue = UnquotedValue.substr(1); - // If this was just a single byte newline, it will get skipped - // below. - break; - case '0': - Storage.push_back(0x00); - break; - case 'a': - Storage.push_back(0x07); - break; - case 'b': - Storage.push_back(0x08); - break; - case 't': - case 0x09: - Storage.push_back(0x09); - break; - case 'n': - Storage.push_back(0x0A); - break; - case 'v': - Storage.push_back(0x0B); - break; - case 'f': - Storage.push_back(0x0C); - break; - case 'r': - Storage.push_back(0x0D); - break; - case 'e': - Storage.push_back(0x1B); - break; + char LastNewLineAddedAs = '\0'; + for (; I != StringRef::npos; I = UnquotedValue.find_first_of(LookupChars)) { + if (UnquotedValue[I] != '\x0D' && UnquotedValue[I] != '\x0A') { + llvm::append_range(Storage, UnquotedValue.take_front(I)); + UnquotedValue = UnescapeCallback(UnquotedValue.drop_front(I), Storage); + LastNewLineAddedAs = '\0'; + continue; + } + if (size_t LastNonSWhite = UnquotedValue.find_last_not_of("\x20\x09", I); + LastNonSWhite != StringRef::npos) { + llvm::append_range(Storage, UnquotedValue.take_front(LastNonSWhite + 1)); + Storage.push_back(' '); + LastNewLineAddedAs = ' '; + } else { + // Note: we can't just check if the last character in Storage is ' ', + // '\n', or something else; that would give a wrong result for double + // quoted values containing an escaped space character before a new-line + // character. + switch (LastNewLineAddedAs) { case ' ': - Storage.push_back(0x20); - break; - case '"': - Storage.push_back(0x22); - break; - case '/': - Storage.push_back(0x2F); - break; - case '\\': - Storage.push_back(0x5C); - break; - case 'N': - encodeUTF8(0x85, Storage); + assert(!Storage.empty() && Storage.back() == ' '); + Storage.back() = '\n'; + LastNewLineAddedAs = '\n'; break; - case '_': - encodeUTF8(0xA0, Storage); - break; - case 'L': - encodeUTF8(0x2028, Storage); + case '\n': + assert(!Storage.empty() && Storage.back() == '\n'); + Storage.push_back('\n'); break; - case 'P': - encodeUTF8(0x2029, Storage); + default: + Storage.push_back(' '); + LastNewLineAddedAs = ' '; break; - case 'x': { - if (UnquotedValue.size() < 3) - // TODO: Report error. - break; - unsigned int UnicodeScalarValue; - if (UnquotedValue.substr(1, 2).getAsInteger(16, UnicodeScalarValue)) - // TODO: Report error. - UnicodeScalarValue = 0xFFFD; - encodeUTF8(UnicodeScalarValue, Storage); - UnquotedValue = UnquotedValue.substr(2); - break; - } - case 'u': { - if (UnquotedValue.size() < 5) - // TODO: Report error. - break; - unsigned int UnicodeScalarValue; - if (UnquotedValue.substr(1, 4).getAsInteger(16, UnicodeScalarValue)) - // TODO: Report error. - UnicodeScalarValue = 0xFFFD; - encodeUTF8(UnicodeScalarValue, Storage); - UnquotedValue = UnquotedValue.substr(4); - break; - } - case 'U': { - if (UnquotedValue.size() < 9) - // TODO: Report error. - break; - unsigned int UnicodeScalarValue; - if (UnquotedValue.substr(1, 8).getAsInteger(16, UnicodeScalarValue)) - // TODO: Report error. - UnicodeScalarValue = 0xFFFD; - encodeUTF8(UnicodeScalarValue, Storage); - UnquotedValue = UnquotedValue.substr(8); - break; - } } - UnquotedValue = UnquotedValue.substr(1); } + // Handle Windows-style EOL + if (UnquotedValue.substr(I, 2) == "\x0D\x0A") + I++; + UnquotedValue = UnquotedValue.drop_front(I + 1).ltrim("\x20\x09"); } llvm::append_range(Storage, UnquotedValue); return StringRef(Storage.begin(), Storage.size()); } +StringRef +ScalarNode::getDoubleQuotedValue(StringRef RawValue, + SmallVectorImpl<char> &Storage) const { + assert(RawValue.size() >= 2 && RawValue.front() == '"' && + RawValue.back() == '"'); + StringRef UnquotedValue = RawValue.substr(1, RawValue.size() - 2); + + auto UnescapeFunc = [this](StringRef UnquotedValue, + SmallVectorImpl<char> &Storage) { + assert(UnquotedValue.take_front(1) == "\\"); + if (UnquotedValue.size() == 1) { + Token T; + T.Range = UnquotedValue; + this->setError("Unrecognized escape code", T); + Storage.clear(); + return StringRef(); + } + UnquotedValue = UnquotedValue.drop_front(1); + switch (UnquotedValue[0]) { + default: { + Token T; + T.Range = UnquotedValue.take_front(1); + setError("Unrecognized escape code", T); + Storage.clear(); + return StringRef(); + } + case '\x0D': + // Remove the Windows-style EOL. + if (UnquotedValue.size() >= 2 && UnquotedValue[1] == '\x0A') + return UnquotedValue.drop_front(2); + // If this was just a single byte newline, it will get skipped below. + break; + case '\x0A': + // If this was just a single byte newline, it will get skipped below. + break; + case '0': + Storage.push_back(0x00); + break; + case 'a': + Storage.push_back(0x07); + break; + case 'b': + Storage.push_back(0x08); + break; + case 't': + case 0x09: + Storage.push_back(0x09); + break; + case 'n': + Storage.push_back(0x0A); + break; + case 'v': + Storage.push_back(0x0B); + break; + case 'f': + Storage.push_back(0x0C); + break; + case 'r': + Storage.push_back(0x0D); + break; + case 'e': + Storage.push_back(0x1B); + break; + case ' ': + Storage.push_back(0x20); + break; + case '"': + Storage.push_back(0x22); + break; + case '/': + Storage.push_back(0x2F); + break; + case '\\': + Storage.push_back(0x5C); + break; + case 'N': + encodeUTF8(0x85, Storage); + break; + case '_': + encodeUTF8(0xA0, Storage); + break; + case 'L': + encodeUTF8(0x2028, Storage); + break; + case 'P': + encodeUTF8(0x2029, Storage); + break; + case 'x': { + if (UnquotedValue.size() < 3) + // TODO: Report error. + break; + unsigned int UnicodeScalarValue; + if (UnquotedValue.substr(1, 2).getAsInteger(16, UnicodeScalarValue)) + // TODO: Report error. + UnicodeScalarValue = 0xFFFD; + encodeUTF8(UnicodeScalarValue, Storage); + return UnquotedValue.drop_front(3); + } + case 'u': { + if (UnquotedValue.size() < 5) + // TODO: Report error. + break; + unsigned int UnicodeScalarValue; + if (UnquotedValue.substr(1, 4).getAsInteger(16, UnicodeScalarValue)) + // TODO: Report error. + UnicodeScalarValue = 0xFFFD; + encodeUTF8(UnicodeScalarValue, Storage); + return UnquotedValue.drop_front(5); + } + case 'U': { + if (UnquotedValue.size() < 9) + // TODO: Report error. + break; + unsigned int UnicodeScalarValue; + if (UnquotedValue.substr(1, 8).getAsInteger(16, UnicodeScalarValue)) + // TODO: Report error. + UnicodeScalarValue = 0xFFFD; + encodeUTF8(UnicodeScalarValue, Storage); + return UnquotedValue.drop_front(9); + } + } + return UnquotedValue.drop_front(1); + }; + + return parseScalarValue(UnquotedValue, Storage, "\\\x0A\x0D", UnescapeFunc); +} + +StringRef ScalarNode::getSingleQuotedValue(StringRef RawValue, + SmallVectorImpl<char> &Storage) { + assert(RawValue.size() >= 2 && RawValue.front() == '\'' && + RawValue.back() == '\''); + StringRef UnquotedValue = RawValue.substr(1, RawValue.size() - 2); + + auto UnescapeFunc = [](StringRef UnquotedValue, + SmallVectorImpl<char> &Storage) { + assert(UnquotedValue.take_front(2) == "''"); + Storage.push_back('\''); + return UnquotedValue.drop_front(2); + }; + + return parseScalarValue(UnquotedValue, Storage, "'\x0A\x0D", UnescapeFunc); +} + +StringRef ScalarNode::getPlainValue(StringRef RawValue, + SmallVectorImpl<char> &Storage) { + // Trim trailing whitespace ('b-char' and 's-white'). + // NOTE: Alternatively we could change the scanner to not include whitespace + // here in the first place. + RawValue = RawValue.rtrim("\x0A\x0D\x20\x09"); + return parseScalarValue(RawValue, Storage, "\x0A\x0D", nullptr); +} + Node *KeyValueNode::getKey() { if (Key) return Key; diff --git a/llvm/test/YAMLParser/spec-09-01.test b/llvm/test/YAMLParser/spec-09-01.test index 8999b4961626470..f766ee6015d91f7 100644 --- a/llvm/test/YAMLParser/spec-09-01.test +++ b/llvm/test/YAMLParser/spec-09-01.test @@ -1,4 +1,13 @@ -# RUN: yaml-bench -canonical %s +# RUN: yaml-bench -canonical %s | FileCheck %s +# CHECK: !!map { +# CHECK-NEXT: ? !!str "simple key" +# CHECK-NEXT: : !!map { +# CHECK-NEXT: ? !!str "also simple" +# CHECK-NEXT: : !!str "value", +# CHECK-NEXT: ? !!str "not a simple key" +# CHECK-NEXT: : !!str "any value", +# CHECK-NEXT: }, +# CHECK-NEXT: } "simple key" : { "also simple" : value, diff --git a/llvm/test/YAMLParser/spec-09-02.test b/llvm/test/YAMLParser/spec-09-02.test index 3f8e49a8bd31079..5724801ba6ec089 100644 --- a/llvm/test/YAMLParser/spec-09-02.test +++ b/llvm/test/YAMLParser/spec-09-02.test @@ -1,14 +1,29 @@ # RUN: yaml-bench -canonical %s 2>&1 | FileCheck %s +# CHECK: "as space trimmed\nspecific\L escaped\t none" - "as space - trimmed +## Note: The example was originally taken from Spec 1.1, but the parsing rules +## have been changed since then. +## * The line-separator character '\u2028' is no longer considered a line-break +## character, so the line "...specific\u2028\nescaped..." is now parsed as +## "...specific\L escaped...". +## * The paragraph-separator character '\u2029' is also excluded from line-break +## characters, so the original sequence "escaped\t\\\u2029" is no longer +## considered valid. This is replaced by "escaped\t\\\n" in the test source, +# so the output has changed as well. +## See https://yaml.org/spec/1.2.2/ext/changes/ for details. +## +## Note 2: Different parsers handle this corner case example differently. +## * https://github.com/yaml/libyaml: +## "as space trimmed\nspecific\L\nescaped\t\nnone" +## * https://github.com/yaml/yaml-reference-parser (parser-1.2): +## "as space trimmed\nspecific\L escaped\t none" +## * https://github.com/yaml/yaml-reference-parser (parser-1.3): +## "as space trimmed\nspecific escaped\t none" - specific + "as space + trimmed + specific escaped \ + none" - -# FIXME: The string below should actually be -# "as space trimmed\nspecific\nescaped\tnone", but the parser currently has -# a bug when parsing multiline quoted strings. -# CHECK: !!str "as space\n trimmed\n specific\n escaped\t none" diff --git a/llvm/test/YAMLParser/spec-09-03.test b/llvm/test/YAMLParser/spec-09-03.test index 3fb0d8b184abb16..5067bf5bd740295 100644 --- a/llvm/test/YAMLParser/spec-09-03.test +++ b/llvm/test/YAMLParser/spec-09-03.test @@ -1,4 +1,9 @@ -# RUN: yaml-bench -canonical %s +# RUN: yaml-bench -canonical %s | FileCheck %s +# CHECK: !!seq [ +# CHECK-NEXT: !!str " last", +# CHECK-NEXT: !!str " last", +# CHECK-NEXT: !!str " \tfirst last", +# CHECK-NEXT: ] - " last" diff --git a/llvm/test/YAMLParser/spec-09-04.test b/llvm/test/YAMLParser/spec-09-04.test index 4178ec6befbd538..73c66e975dee948 100644 --- a/llvm/test/YAMLParser/spec-09-04.test +++ b/llvm/test/YAMLParser/spec-09-04.test @@ -1,4 +1,5 @@ -# RUN: yaml-bench -canonical %s +# RUN: yaml-bench -canonical %s | FileCheck %s +# CHECK: "first inner 1 inner 2 last" "first inner 1 diff --git a/llvm/test/YAMLParser/spec-09-05.test b/llvm/test/YAMLParser/spec-09-05.test index e482d5366235b79..e2b2b2a55db9ee9 100644 --- a/llvm/test/YAMLParser/spec-09-05.test +++ b/llvm/test/YAMLParser/spec-09-05.test @@ -1,4 +1,9 @@ -# RUN: yaml-bench -canonical %s +# RUN: yaml-bench -canonical %s | FileCheck %s +# CHECK: !!seq [ +# CHECK-NEXT: !!str "first ", +# CHECK-NEXT: !!str "first\nlast", +# CHECK-NEXT: !!str "first inner \tlast", +# CHECK-NEXT: ] - "first " diff --git a/llvm/test/YAMLParser/spec-09-06.test b/llvm/test/YAMLParser/spec-09-06.test index edc0cbba9004c4a..084cda1cd52d789 100644 --- a/llvm/test/YAMLParser/spec-09-06.test +++ b/llvm/test/YAMLParser/spec-09-06.test @@ -1,3 +1,4 @@ -# RUN: yaml-bench -canonical %s +# RUN: yaml-bench -canonical %s | FileCheck %s +# CHECK: "here's to \"quotes\"" 'here''s to "quotes"' diff --git a/llvm/test/YAMLParser/spec-09-07.test b/llvm/test/YAMLParser/spec-09-07.test index 3c010ca5b93bdd3..35171ec32ba8713 100644 --- a/llvm/test/YAMLParser/spec-09-07.test +++ b/llvm/test/YAMLParser/spec-09-07.test @@ -1,4 +1,13 @@ -# RUN: yaml-bench -canonical %s +# RUN: yaml-bench -canonical %s | FileCheck %s +# CHECK: !!map { +# CHECK-NEXT: ? !!str "simple key" +# CHECK-NEXT: : !!map { +# CHECK-NEXT: ? !!str "also simple" +# CHECK-NEXT: : !!str "value", +# CHECK-NEXT: ? !!str "not a simple key" +# CHECK-NEXT: : !!str "any value", +# CHECK-NEXT: }, +# CHECK-NEXT: } 'simple key' : { 'also simple' : value, diff --git a/llvm/test/YAMLParser/spec-09-08.test b/llvm/test/YAMLParser/spec-09-08.test index d114e58fcac15da..6cef92912bf5007 100644 --- a/llvm/test/YAMLParser/spec-09-08.test +++ b/llvm/test/YAMLParser/spec-09-08.test @@ -1,3 +1,14 @@ -# RUN: yaml-bench -canonical %s +# RUN: yaml-bench -canonical %s | FileCheck %s +# CHECK: "as space trimmed\nspecific\L none" - 'as space trimmed specific none' +## Note: The parsing rules were changed in version 1.2 and the line-separator +## character is no longer considered a line-break character. The example is +## taken from Spec 1.1 and is now parsed as "..\L .." instead of "..\L\n.." as +## in the original edition. +## See https://yaml.org/spec/1.2.2/ext/changes/ for details. + +'as space +trimmed + +specific +none' diff --git a/llvm/test/YAMLParser/spec-09-09.test b/llvm/test/YAMLParser/spec-09-09.test index 2fec1b536ef1357..133387c42cb3758 100644 --- a/llvm/test/YAMLParser/spec-09-09.test +++ b/llvm/test/YAMLParser/spec-09-09.test @@ -1,4 +1,9 @@ -# RUN: yaml-bench -canonical %s +# RUN: yaml-bench -canonical %s | FileCheck %s +# CHECK: !!seq [ +# CHECK-NEXT: !!str " last", +# CHECK-NEXT: !!str " last", +# CHECK-NEXT: !!str " \tfirst last", +# CHECK-NEXT: ] - ' last' diff --git a/llvm/test/YAMLParser/spec-09-10.test b/llvm/test/YAMLParser/spec-09-10.test index faabfb06b5ec2af..697efc2f1bc9142 100644 --- a/llvm/test/YAMLParser/spec-09-10.test +++ b/llvm/test/YAMLParser/spec-09-10.test @@ -1,4 +1,5 @@ -# RUN: yaml-bench -canonical %s +# RUN: yaml-bench -canonical %s | FileCheck %s +# CHECK: "first inner last" 'first inner diff --git a/llvm/test/YAMLParser/spec-09-11.test b/llvm/test/YAMLParser/spec-09-11.test index 3f487ad6b04398d..d7f24ea83390b27 100644 --- a/llvm/test/YAMLParser/spec-09-11.test +++ b/llvm/test/YAMLParser/spec-09-11.test @@ -1,4 +1,8 @@ -# RUN: yaml-bench -canonical %s +# RUN: yaml-bench -canonical %s | FileCheck %s +# CHECK: !!seq [ +# CHECK-NEXT: !!str "first ", +# CHECK-NEXT: !!str "first\nlast", +# CHECK-NEXT: ] - 'first ' diff --git a/llvm/test/YAMLParser/spec-09-13.test b/llvm/test/YAMLParser/spec-09-13.test index d48f2d2c47ee3d5..c93abdccaefedcd 100644 --- a/llvm/test/YAMLParser/spec-09-13.test +++ b/llvm/test/YAMLParser/spec-09-13.test @@ -1,4 +1,13 @@ -# RUN: yaml-bench -canonical %s +# RUN: yaml-bench -canonical %s | FileCheck %s +# CHECK: !!map { +# CHECK-NEXT: ? !!str "simple key" +# CHECK-NEXT: : !!map { +# CHECK-NEXT: ? !!str "also simple" +# CHECK-NEXT: : !!str "value", +# CHECK-NEXT: ? !!str "not a simple key" +# CHECK-NEXT: : !!str "any value", +# CHECK-NEXT: }, +# CHECK-NEXT: } simple key : { also simple : value, diff --git a/llvm/test/YAMLParser/spec-09-16.test b/llvm/test/YAMLParser/spec-09-16.test index e595f47bece9d97..b38f405c8499708 100644 --- a/llvm/test/YAMLParser/spec-09-16.test +++ b/llvm/test/YAMLParser/spec-09-16.test @@ -1,5 +1,14 @@ -# RUN: yaml-bench -canonical %s +# RUN: yaml-bench -canonical %s | FileCheck %s +# CHECK: "as space trimmed\nspecific\L none" -# Tabs are confusing: -# as space/trimmed/specific/none - as space trimmed specific none +## Note: The parsing rules were changed in version 1.2 and the line-separator +## character is no longer considered a line-break character. The example is +## taken from Spec 1.1 and is now parsed as "..\L .." instead of "..\L\n.." as +## in the original edition. +## See https://yaml.org/spec/1.2.2/ext/changes/ for details. + + as space + trimmed + + specific + none diff --git a/llvm/test/YAMLParser/spec-09-17.test b/llvm/test/YAMLParser/spec-09-17.test index 1bacf4d68b1f7ab..5020cb11b2724a3 100644 --- a/llvm/test/YAMLParser/spec-09-17.test +++ b/llvm/test/YAMLParser/spec-09-17.test @@ -1,4 +1,5 @@ -# RUN: yaml-bench -canonical %s +# RUN: yaml-bench -canonical %s | FileCheck %s +# CHECK: "first line\nmore line" first line diff --git a/llvm/test/YAMLParser/spec1.2-07-05.test b/llvm/test/YAMLParser/spec1.2-07-05.test new file mode 100644 index 000000000000000..629372059b3ebce --- /dev/null +++ b/llvm/test/YAMLParser/spec1.2-07-05.test @@ -0,0 +1,8 @@ +# RUN: yaml-bench -canonical %s | FileCheck %s +# CHECK: "folded to a space,\nto a line feed, or \t \tnon-content" + +"folded +to a space, + +to a line feed, or \ + \ non-content" diff --git a/llvm/test/YAMLParser/spec1.2-07-06.test b/llvm/test/YAMLParser/spec1.2-07-06.test new file mode 100644 index 000000000000000..7bbe3e7d2e5cbaf --- /dev/null +++ b/llvm/test/YAMLParser/spec1.2-07-06.test @@ -0,0 +1,7 @@ +# RUN: yaml-bench -canonical %s | FileCheck %s +# CHECK: " 1st non-empty\n2nd non-empty 3rd non-empty " + +" 1st non-empty + + 2nd non-empty + 3rd non-empty " diff --git a/llvm/test/YAMLParser/spec1.2-07-09.test b/llvm/test/YAMLParser/spec1.2-07-09.test new file mode 100644 index 000000000000000..c286bcb28452f37 --- /dev/null +++ b/llvm/test/YAMLParser/spec1.2-07-09.test @@ -0,0 +1,7 @@ +# RUN: yaml-bench -canonical %s | FileCheck %s +# CHECK: " 1st non-empty\n2nd non-empty 3rd non-empty " + +' 1st non-empty + + 2nd non-empty + 3rd non-empty ' diff --git a/llvm/test/YAMLParser/spec1.2-07-12.test b/llvm/test/YAMLParser/spec1.2-07-12.test new file mode 100644 index 000000000000000..0d4eed4b9a5c66a --- /dev/null +++ b/llvm/test/YAMLParser/spec1.2-07-12.test @@ -0,0 +1,7 @@ +# RUN: yaml-bench -canonical %s | FileCheck %s +# CHECK: "1st non-empty\n2nd non-empty 3rd non-empty" + +1st non-empty + + 2nd non-empty + 3rd non-empty diff --git a/llvm/test/YAMLParser/spec1.2-07-14.test b/llvm/test/YAMLParser/spec1.2-07-14.test new file mode 100644 index 000000000000000..908b7562ba8109d --- /dev/null +++ b/llvm/test/YAMLParser/spec1.2-07-14.test @@ -0,0 +1,23 @@ +# RUN: yaml-bench -canonical %s | FileCheck %s + +# CHECK: !!seq [ +# CHECK-NEXT: !!str "double +# CHECK-NEXT: !!str "single quoted", +# CHECK-NEXT: !!str "plain text", +# CHECK-NEXT: !!seq [ +# CHECK-NEXT: !!str "nested", +# CHECK-NEXT: ], +# CHECK-NEXT: !!map { +# CHECK-NEXT: ? !!str "single" +# CHECK-NEXT: : !!str "pair", +# CHECK-NEXT: }, +# CHECK-NEXT: ] + +[ +"double + quoted", 'single + quoted', +plain + text, [ nested ], +single: pair, +] _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits