http://git-wip-us.apache.org/repos/asf/marmotta/blob/0eb556da/libraries/ostrich/backend/3rdparty/abseil/absl/strings/ascii_test.cc ---------------------------------------------------------------------- diff --git a/libraries/ostrich/backend/3rdparty/abseil/absl/strings/ascii_test.cc b/libraries/ostrich/backend/3rdparty/abseil/absl/strings/ascii_test.cc new file mode 100644 index 0000000..97f3601 --- /dev/null +++ b/libraries/ostrich/backend/3rdparty/abseil/absl/strings/ascii_test.cc @@ -0,0 +1,354 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/ascii.h" + +#include <cctype> +#include <clocale> +#include <cstring> +#include <string> + +#include "gtest/gtest.h" +#include "absl/base/macros.h" +#include "absl/base/port.h" + +namespace { + +TEST(AsciiIsFoo, All) { + for (int i = 0; i < 256; i++) { + if ((i >= 'a' && i <= 'z') || (i >= 'A' && i <= 'Z')) + EXPECT_TRUE(absl::ascii_isalpha(i)) << ": failed on " << i; + else + EXPECT_TRUE(!absl::ascii_isalpha(i)) << ": failed on " << i; + } + for (int i = 0; i < 256; i++) { + if ((i >= '0' && i <= '9')) + EXPECT_TRUE(absl::ascii_isdigit(i)) << ": failed on " << i; + else + EXPECT_TRUE(!absl::ascii_isdigit(i)) << ": failed on " << i; + } + for (int i = 0; i < 256; i++) { + if (absl::ascii_isalpha(i) || absl::ascii_isdigit(i)) + EXPECT_TRUE(absl::ascii_isalnum(i)) << ": failed on " << i; + else + EXPECT_TRUE(!absl::ascii_isalnum(i)) << ": failed on " << i; + } + for (int i = 0; i < 256; i++) { + if (i != '\0' && strchr(" \r\n\t\v\f", i)) + EXPECT_TRUE(absl::ascii_isspace(i)) << ": failed on " << i; + else + EXPECT_TRUE(!absl::ascii_isspace(i)) << ": failed on " << i; + } + for (int i = 0; i < 256; i++) { + if (i >= 32 && i < 127) + EXPECT_TRUE(absl::ascii_isprint(i)) << ": failed on " << i; + else + EXPECT_TRUE(!absl::ascii_isprint(i)) << ": failed on " << i; + } + for (int i = 0; i < 256; i++) { + if (absl::ascii_isprint(i) && !absl::ascii_isspace(i) && + !absl::ascii_isalnum(i)) + EXPECT_TRUE(absl::ascii_ispunct(i)) << ": failed on " << i; + else + EXPECT_TRUE(!absl::ascii_ispunct(i)) << ": failed on " << i; + } + for (int i = 0; i < 256; i++) { + if (i == ' ' || i == '\t') + EXPECT_TRUE(absl::ascii_isblank(i)) << ": failed on " << i; + else + EXPECT_TRUE(!absl::ascii_isblank(i)) << ": failed on " << i; + } + for (int i = 0; i < 256; i++) { + if (i < 32 || i == 127) + EXPECT_TRUE(absl::ascii_iscntrl(i)) << ": failed on " << i; + else + EXPECT_TRUE(!absl::ascii_iscntrl(i)) << ": failed on " << i; + } + for (int i = 0; i < 256; i++) { + if (absl::ascii_isdigit(i) || (i >= 'A' && i <= 'F') || + (i >= 'a' && i <= 'f')) + EXPECT_TRUE(absl::ascii_isxdigit(i)) << ": failed on " << i; + else + EXPECT_TRUE(!absl::ascii_isxdigit(i)) << ": failed on " << i; + } + for (int i = 0; i < 256; i++) { + if (i > 32 && i < 127) + EXPECT_TRUE(absl::ascii_isgraph(i)) << ": failed on " << i; + else + EXPECT_TRUE(!absl::ascii_isgraph(i)) << ": failed on " << i; + } + for (int i = 0; i < 256; i++) { + if (i >= 'A' && i <= 'Z') + EXPECT_TRUE(absl::ascii_isupper(i)) << ": failed on " << i; + else + EXPECT_TRUE(!absl::ascii_isupper(i)) << ": failed on " << i; + } + for (int i = 0; i < 256; i++) { + if (i >= 'a' && i <= 'z') + EXPECT_TRUE(absl::ascii_islower(i)) << ": failed on " << i; + else + EXPECT_TRUE(!absl::ascii_islower(i)) << ": failed on " << i; + } + for (int i = 0; i < 128; i++) { + EXPECT_TRUE(absl::ascii_isascii(i)) << ": failed on " << i; + } + for (int i = 128; i < 256; i++) { + EXPECT_TRUE(!absl::ascii_isascii(i)) << ": failed on " << i; + } + + // The official is* functions don't accept negative signed chars, but + // our absl::ascii_is* functions do. + for (int i = 0; i < 256; i++) { + signed char sc = static_cast<signed char>(static_cast<unsigned char>(i)); + EXPECT_EQ(absl::ascii_isalpha(i), absl::ascii_isalpha(sc)) << i; + EXPECT_EQ(absl::ascii_isdigit(i), absl::ascii_isdigit(sc)) << i; + EXPECT_EQ(absl::ascii_isalnum(i), absl::ascii_isalnum(sc)) << i; + EXPECT_EQ(absl::ascii_isspace(i), absl::ascii_isspace(sc)) << i; + EXPECT_EQ(absl::ascii_ispunct(i), absl::ascii_ispunct(sc)) << i; + EXPECT_EQ(absl::ascii_isblank(i), absl::ascii_isblank(sc)) << i; + EXPECT_EQ(absl::ascii_iscntrl(i), absl::ascii_iscntrl(sc)) << i; + EXPECT_EQ(absl::ascii_isxdigit(i), absl::ascii_isxdigit(sc)) << i; + EXPECT_EQ(absl::ascii_isprint(i), absl::ascii_isprint(sc)) << i; + EXPECT_EQ(absl::ascii_isgraph(i), absl::ascii_isgraph(sc)) << i; + EXPECT_EQ(absl::ascii_isupper(i), absl::ascii_isupper(sc)) << i; + EXPECT_EQ(absl::ascii_islower(i), absl::ascii_islower(sc)) << i; + EXPECT_EQ(absl::ascii_isascii(i), absl::ascii_isascii(sc)) << i; + } +} + +// Checks that absl::ascii_isfoo returns the same value as isfoo in the C +// locale. +TEST(AsciiIsFoo, SameAsIsFoo) { + // temporarily change locale to C. It should already be C, but just for safety + std::string old_locale = setlocale(LC_CTYPE, nullptr); + ASSERT_TRUE(setlocale(LC_CTYPE, "C")); + + for (int i = 0; i < 256; i++) { + EXPECT_EQ(isalpha(i) != 0, absl::ascii_isalpha(i)) << i; + EXPECT_EQ(isdigit(i) != 0, absl::ascii_isdigit(i)) << i; + EXPECT_EQ(isalnum(i) != 0, absl::ascii_isalnum(i)) << i; + EXPECT_EQ(isspace(i) != 0, absl::ascii_isspace(i)) << i; + EXPECT_EQ(ispunct(i) != 0, absl::ascii_ispunct(i)) << i; + EXPECT_EQ(isblank(i) != 0, absl::ascii_isblank(i)) << i; + EXPECT_EQ(iscntrl(i) != 0, absl::ascii_iscntrl(i)) << i; + EXPECT_EQ(isxdigit(i) != 0, absl::ascii_isxdigit(i)) << i; + EXPECT_EQ(isprint(i) != 0, absl::ascii_isprint(i)) << i; + EXPECT_EQ(isgraph(i) != 0, absl::ascii_isgraph(i)) << i; + EXPECT_EQ(isupper(i) != 0, absl::ascii_isupper(i)) << i; + EXPECT_EQ(islower(i) != 0, absl::ascii_islower(i)) << i; + EXPECT_EQ(isascii(i) != 0, absl::ascii_isascii(i)) << i; + } + + // restore the old locale. + ASSERT_TRUE(setlocale(LC_CTYPE, old_locale.c_str())); +} + +TEST(AsciiToFoo, All) { + // temporarily change locale to C. It should already be C, but just for safety + std::string old_locale = setlocale(LC_CTYPE, nullptr); + ASSERT_TRUE(setlocale(LC_CTYPE, "C")); + + for (int i = 0; i < 256; i++) { + if (absl::ascii_islower(i)) + EXPECT_EQ(absl::ascii_toupper(i), 'A' + (i - 'a')) << i; + else + EXPECT_EQ(absl::ascii_toupper(i), static_cast<char>(i)) << i; + + if (absl::ascii_isupper(i)) + EXPECT_EQ(absl::ascii_tolower(i), 'a' + (i - 'A')) << i; + else + EXPECT_EQ(absl::ascii_tolower(i), static_cast<char>(i)) << i; + + // These CHECKs only hold in a C locale. + EXPECT_EQ(static_cast<char>(tolower(i)), absl::ascii_tolower(i)) << i; + EXPECT_EQ(static_cast<char>(toupper(i)), absl::ascii_toupper(i)) << i; + + // The official to* functions don't accept negative signed chars, but + // our absl::ascii_to* functions do. + signed char sc = static_cast<signed char>(static_cast<unsigned char>(i)); + EXPECT_EQ(absl::ascii_tolower(i), absl::ascii_tolower(sc)) << i; + EXPECT_EQ(absl::ascii_toupper(i), absl::ascii_toupper(sc)) << i; + } + + // restore the old locale. + ASSERT_TRUE(setlocale(LC_CTYPE, old_locale.c_str())); +} + +TEST(AsciiStrTo, Lower) { + const char buf[] = "ABCDEF"; + const std::string str("GHIJKL"); + const std::string str2("MNOPQR"); + const absl::string_view sp(str2); + + EXPECT_EQ("abcdef", absl::AsciiStrToLower(buf)); + EXPECT_EQ("ghijkl", absl::AsciiStrToLower(str)); + EXPECT_EQ("mnopqr", absl::AsciiStrToLower(sp)); + + char mutable_buf[] = "Mutable"; + std::transform(mutable_buf, mutable_buf + strlen(mutable_buf), + mutable_buf, absl::ascii_tolower); + EXPECT_STREQ("mutable", mutable_buf); +} + +TEST(AsciiStrTo, Upper) { + const char buf[] = "abcdef"; + const std::string str("ghijkl"); + const std::string str2("mnopqr"); + const absl::string_view sp(str2); + + EXPECT_EQ("ABCDEF", absl::AsciiStrToUpper(buf)); + EXPECT_EQ("GHIJKL", absl::AsciiStrToUpper(str)); + EXPECT_EQ("MNOPQR", absl::AsciiStrToUpper(sp)); + + char mutable_buf[] = "Mutable"; + std::transform(mutable_buf, mutable_buf + strlen(mutable_buf), + mutable_buf, absl::ascii_toupper); + EXPECT_STREQ("MUTABLE", mutable_buf); +} + +TEST(StripLeadingAsciiWhitespace, FromStringView) { + EXPECT_EQ(absl::string_view{}, + absl::StripLeadingAsciiWhitespace(absl::string_view{})); + EXPECT_EQ("foo", absl::StripLeadingAsciiWhitespace({"foo"})); + EXPECT_EQ("foo", absl::StripLeadingAsciiWhitespace({"\t \n\f\r\n\vfoo"})); + EXPECT_EQ("foo foo\n ", + absl::StripLeadingAsciiWhitespace({"\t \n\f\r\n\vfoo foo\n "})); + EXPECT_EQ(absl::string_view{}, absl::StripLeadingAsciiWhitespace( + {"\t \n\f\r\v\n\t \n\f\r\v\n"})); +} + +TEST(StripLeadingAsciiWhitespace, InPlace) { + std::string str; + + absl::StripLeadingAsciiWhitespace(&str); + EXPECT_EQ("", str); + + str = "foo"; + absl::StripLeadingAsciiWhitespace(&str); + EXPECT_EQ("foo", str); + + str = "\t \n\f\r\n\vfoo"; + absl::StripLeadingAsciiWhitespace(&str); + EXPECT_EQ("foo", str); + + str = "\t \n\f\r\n\vfoo foo\n "; + absl::StripLeadingAsciiWhitespace(&str); + EXPECT_EQ("foo foo\n ", str); + + str = "\t \n\f\r\v\n\t \n\f\r\v\n"; + absl::StripLeadingAsciiWhitespace(&str); + EXPECT_EQ(absl::string_view{}, str); +} + +TEST(StripTrailingAsciiWhitespace, FromStringView) { + EXPECT_EQ(absl::string_view{}, + absl::StripTrailingAsciiWhitespace(absl::string_view{})); + EXPECT_EQ("foo", absl::StripTrailingAsciiWhitespace({"foo"})); + EXPECT_EQ("foo", absl::StripTrailingAsciiWhitespace({"foo\t \n\f\r\n\v"})); + EXPECT_EQ(" \nfoo foo", + absl::StripTrailingAsciiWhitespace({" \nfoo foo\t \n\f\r\n\v"})); + EXPECT_EQ(absl::string_view{}, absl::StripTrailingAsciiWhitespace( + {"\t \n\f\r\v\n\t \n\f\r\v\n"})); +} + +TEST(StripTrailingAsciiWhitespace, InPlace) { + std::string str; + + absl::StripTrailingAsciiWhitespace(&str); + EXPECT_EQ("", str); + + str = "foo"; + absl::StripTrailingAsciiWhitespace(&str); + EXPECT_EQ("foo", str); + + str = "foo\t \n\f\r\n\v"; + absl::StripTrailingAsciiWhitespace(&str); + EXPECT_EQ("foo", str); + + str = " \nfoo foo\t \n\f\r\n\v"; + absl::StripTrailingAsciiWhitespace(&str); + EXPECT_EQ(" \nfoo foo", str); + + str = "\t \n\f\r\v\n\t \n\f\r\v\n"; + absl::StripTrailingAsciiWhitespace(&str); + EXPECT_EQ(absl::string_view{}, str); +} + +TEST(StripAsciiWhitespace, FromStringView) { + EXPECT_EQ(absl::string_view{}, + absl::StripAsciiWhitespace(absl::string_view{})); + EXPECT_EQ("foo", absl::StripAsciiWhitespace({"foo"})); + EXPECT_EQ("foo", + absl::StripAsciiWhitespace({"\t \n\f\r\n\vfoo\t \n\f\r\n\v"})); + EXPECT_EQ("foo foo", absl::StripAsciiWhitespace( + {"\t \n\f\r\n\vfoo foo\t \n\f\r\n\v"})); + EXPECT_EQ(absl::string_view{}, + absl::StripAsciiWhitespace({"\t \n\f\r\v\n\t \n\f\r\v\n"})); +} + +TEST(StripAsciiWhitespace, InPlace) { + std::string str; + + absl::StripAsciiWhitespace(&str); + EXPECT_EQ("", str); + + str = "foo"; + absl::StripAsciiWhitespace(&str); + EXPECT_EQ("foo", str); + + str = "\t \n\f\r\n\vfoo\t \n\f\r\n\v"; + absl::StripAsciiWhitespace(&str); + EXPECT_EQ("foo", str); + + str = "\t \n\f\r\n\vfoo foo\t \n\f\r\n\v"; + absl::StripAsciiWhitespace(&str); + EXPECT_EQ("foo foo", str); + + str = "\t \n\f\r\v\n\t \n\f\r\v\n"; + absl::StripAsciiWhitespace(&str); + EXPECT_EQ(absl::string_view{}, str); +} + +TEST(RemoveExtraAsciiWhitespace, InPlace) { + const char* inputs[] = {"No extra space", + " Leading whitespace", + "Trailing whitespace ", + " Leading and trailing ", + " Whitespace \t in\v middle ", + "'Eeeeep! \n Newlines!\n", + "nospaces", + "", + "\n\t a\t\n\nb \t\n"}; + + const char* outputs[] = { + "No extra space", + "Leading whitespace", + "Trailing whitespace", + "Leading and trailing", + "Whitespace in middle", + "'Eeeeep! Newlines!", + "nospaces", + "", + "a\nb", + }; + const int NUM_TESTS = ABSL_ARRAYSIZE(inputs); + + for (int i = 0; i < NUM_TESTS; i++) { + std::string s(inputs[i]); + absl::RemoveExtraAsciiWhitespace(&s); + EXPECT_EQ(outputs[i], s); + } +} + +} // namespace
http://git-wip-us.apache.org/repos/asf/marmotta/blob/0eb556da/libraries/ostrich/backend/3rdparty/abseil/absl/strings/escaping.cc ---------------------------------------------------------------------- diff --git a/libraries/ostrich/backend/3rdparty/abseil/absl/strings/escaping.cc b/libraries/ostrich/backend/3rdparty/abseil/absl/strings/escaping.cc new file mode 100644 index 0000000..fbc9f75 --- /dev/null +++ b/libraries/ostrich/backend/3rdparty/abseil/absl/strings/escaping.cc @@ -0,0 +1,1109 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/escaping.h" + +#include <algorithm> +#include <cassert> +#include <cstdint> +#include <cstring> +#include <iterator> +#include <limits> +#include <string> + +#include "absl/base/internal/endian.h" +#include "absl/base/internal/raw_logging.h" +#include "absl/base/internal/unaligned_access.h" +#include "absl/strings/internal/char_map.h" +#include "absl/strings/internal/resize_uninitialized.h" +#include "absl/strings/internal/utf8.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/str_join.h" +#include "absl/strings/string_view.h" + +namespace absl { +namespace { + +// Digit conversion. +constexpr char kHexChar[] = "0123456789abcdef"; + +constexpr char kHexTable[513] = + "000102030405060708090a0b0c0d0e0f" + "101112131415161718191a1b1c1d1e1f" + "202122232425262728292a2b2c2d2e2f" + "303132333435363738393a3b3c3d3e3f" + "404142434445464748494a4b4c4d4e4f" + "505152535455565758595a5b5c5d5e5f" + "606162636465666768696a6b6c6d6e6f" + "707172737475767778797a7b7c7d7e7f" + "808182838485868788898a8b8c8d8e8f" + "909192939495969798999a9b9c9d9e9f" + "a0a1a2a3a4a5a6a7a8a9aaabacadaeaf" + "b0b1b2b3b4b5b6b7b8b9babbbcbdbebf" + "c0c1c2c3c4c5c6c7c8c9cacbcccdcecf" + "d0d1d2d3d4d5d6d7d8d9dadbdcdddedf" + "e0e1e2e3e4e5e6e7e8e9eaebecedeeef" + "f0f1f2f3f4f5f6f7f8f9fafbfcfdfeff"; + +// These are used for the leave_nulls_escaped argument to CUnescapeInternal(). +constexpr bool kUnescapeNulls = false; + +inline bool is_octal_digit(char c) { return ('0' <= c) && (c <= '7'); } + +inline int hex_digit_to_int(char c) { + static_assert('0' == 0x30 && 'A' == 0x41 && 'a' == 0x61, + "Character set must be ASCII."); + assert(absl::ascii_isxdigit(c)); + int x = static_cast<unsigned char>(c); + if (x > '9') { + x += 9; + } + return x & 0xf; +} + +inline bool IsSurrogate(char32_t c, absl::string_view src, std::string* error) { + if (c >= 0xD800 && c <= 0xDFFF) { + if (error) { + *error = absl::StrCat("invalid surrogate character (0xD800-DFFF): \\", + src); + } + return true; + } + return false; +} + +// ---------------------------------------------------------------------- +// CUnescapeInternal() +// Implements both CUnescape() and CUnescapeForNullTerminatedString(). +// +// Unescapes C escape sequences and is the reverse of CEscape(). +// +// If 'source' is valid, stores the unescaped std::string and its size in +// 'dest' and 'dest_len' respectively, and returns true. Otherwise +// returns false and optionally stores the error description in +// 'error'. Set 'error' to nullptr to disable error reporting. +// +// 'dest' should point to a buffer that is at least as big as 'source'. +// 'source' and 'dest' may be the same. +// +// NOTE: any changes to this function must also be reflected in the older +// UnescapeCEscapeSequences(). +// ---------------------------------------------------------------------- +bool CUnescapeInternal(absl::string_view source, bool leave_nulls_escaped, + char* dest, ptrdiff_t* dest_len, std::string* error) { + char* d = dest; + const char* p = source.data(); + const char* end = source.end(); + const char* last_byte = end - 1; + + // Small optimization for case where source = dest and there's no escaping + while (p == d && p < end && *p != '\\') p++, d++; + + while (p < end) { + if (*p != '\\') { + *d++ = *p++; + } else { + if (++p > last_byte) { // skip past the '\\' + if (error) *error = "String cannot end with \\"; + return false; + } + switch (*p) { + case 'a': *d++ = '\a'; break; + case 'b': *d++ = '\b'; break; + case 'f': *d++ = '\f'; break; + case 'n': *d++ = '\n'; break; + case 'r': *d++ = '\r'; break; + case 't': *d++ = '\t'; break; + case 'v': *d++ = '\v'; break; + case '\\': *d++ = '\\'; break; + case '?': *d++ = '\?'; break; // \? Who knew? + case '\'': *d++ = '\''; break; + case '"': *d++ = '\"'; break; + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': { + // octal digit: 1 to 3 digits + const char* octal_start = p; + unsigned int ch = *p - '0'; + if (p < last_byte && is_octal_digit(p[1])) ch = ch * 8 + *++p - '0'; + if (p < last_byte && is_octal_digit(p[1])) + ch = ch * 8 + *++p - '0'; // now points at last digit + if (ch > 0xff) { + if (error) { + *error = "Value of \\" + + std::string(octal_start, p + 1 - octal_start) + + " exceeds 0xff"; + } + return false; + } + if ((ch == 0) && leave_nulls_escaped) { + // Copy the escape sequence for the null character + const ptrdiff_t octal_size = p + 1 - octal_start; + *d++ = '\\'; + memcpy(d, octal_start, octal_size); + d += octal_size; + break; + } + *d++ = ch; + break; + } + case 'x': + case 'X': { + if (p >= last_byte) { + if (error) *error = "String cannot end with \\x"; + return false; + } else if (!absl::ascii_isxdigit(p[1])) { + if (error) *error = "\\x cannot be followed by a non-hex digit"; + return false; + } + unsigned int ch = 0; + const char* hex_start = p; + while (p < last_byte && absl::ascii_isxdigit(p[1])) + // Arbitrarily many hex digits + ch = (ch << 4) + hex_digit_to_int(*++p); + if (ch > 0xFF) { + if (error) { + *error = "Value of \\" + std::string(hex_start, p + 1 - hex_start) + + " exceeds 0xff"; + } + return false; + } + if ((ch == 0) && leave_nulls_escaped) { + // Copy the escape sequence for the null character + const ptrdiff_t hex_size = p + 1 - hex_start; + *d++ = '\\'; + memcpy(d, hex_start, hex_size); + d += hex_size; + break; + } + *d++ = ch; + break; + } + case 'u': { + // \uhhhh => convert 4 hex digits to UTF-8 + char32_t rune = 0; + const char* hex_start = p; + if (p + 4 >= end) { + if (error) { + *error = "\\u must be followed by 4 hex digits: \\" + + std::string(hex_start, p + 1 - hex_start); + } + return false; + } + for (int i = 0; i < 4; ++i) { + // Look one char ahead. + if (absl::ascii_isxdigit(p[1])) { + rune = (rune << 4) + hex_digit_to_int(*++p); // Advance p. + } else { + if (error) { + *error = "\\u must be followed by 4 hex digits: \\" + + std::string(hex_start, p + 1 - hex_start); + } + return false; + } + } + if ((rune == 0) && leave_nulls_escaped) { + // Copy the escape sequence for the null character + *d++ = '\\'; + memcpy(d, hex_start, 5); // u0000 + d += 5; + break; + } + if (IsSurrogate(rune, absl::string_view(hex_start, 5), error)) { + return false; + } + d += strings_internal::EncodeUTF8Char(d, rune); + break; + } + case 'U': { + // \Uhhhhhhhh => convert 8 hex digits to UTF-8 + char32_t rune = 0; + const char* hex_start = p; + if (p + 8 >= end) { + if (error) { + *error = "\\U must be followed by 8 hex digits: \\" + + std::string(hex_start, p + 1 - hex_start); + } + return false; + } + for (int i = 0; i < 8; ++i) { + // Look one char ahead. + if (absl::ascii_isxdigit(p[1])) { + // Don't change rune until we're sure this + // is within the Unicode limit, but do advance p. + uint32_t newrune = (rune << 4) + hex_digit_to_int(*++p); + if (newrune > 0x10FFFF) { + if (error) { + *error = "Value of \\" + + std::string(hex_start, p + 1 - hex_start) + + " exceeds Unicode limit (0x10FFFF)"; + } + return false; + } else { + rune = newrune; + } + } else { + if (error) { + *error = "\\U must be followed by 8 hex digits: \\" + + std::string(hex_start, p + 1 - hex_start); + } + return false; + } + } + if ((rune == 0) && leave_nulls_escaped) { + // Copy the escape sequence for the null character + *d++ = '\\'; + memcpy(d, hex_start, 9); // U00000000 + d += 9; + break; + } + if (IsSurrogate(rune, absl::string_view(hex_start, 9), error)) { + return false; + } + d += strings_internal::EncodeUTF8Char(d, rune); + break; + } + default: { + if (error) *error = std::string("Unknown escape sequence: \\") + *p; + return false; + } + } + p++; // read past letter we escaped + } + } + *dest_len = d - dest; + return true; +} + +// ---------------------------------------------------------------------- +// CUnescapeInternal() +// +// Same as above but uses a C++ std::string for output. 'source' and 'dest' +// may be the same. +// ---------------------------------------------------------------------- +bool CUnescapeInternal(absl::string_view source, bool leave_nulls_escaped, + std::string* dest, std::string* error) { + strings_internal::STLStringResizeUninitialized(dest, source.size()); + + ptrdiff_t dest_size; + if (!CUnescapeInternal(source, + leave_nulls_escaped, + const_cast<char*>(dest->data()), + &dest_size, + error)) { + return false; + } + dest->erase(dest_size); + return true; +} + +// ---------------------------------------------------------------------- +// CEscape() +// CHexEscape() +// Utf8SafeCEscape() +// Utf8SafeCHexEscape() +// Escapes 'src' using C-style escape sequences. This is useful for +// preparing query flags. The 'Hex' version uses hexadecimal rather than +// octal sequences. The 'Utf8Safe' version does not touch UTF-8 bytes. +// +// Escaped chars: \n, \r, \t, ", ', \, and !absl::ascii_isprint(). +// ---------------------------------------------------------------------- +std::string CEscapeInternal(absl::string_view src, bool use_hex, bool utf8_safe) { + std::string dest; + bool last_hex_escape = false; // true if last output char was \xNN. + + for (unsigned char c : src) { + bool is_hex_escape = false; + switch (c) { + case '\n': dest.append("\\" "n"); break; + case '\r': dest.append("\\" "r"); break; + case '\t': dest.append("\\" "t"); break; + case '\"': dest.append("\\" "\""); break; + case '\'': dest.append("\\" "'"); break; + case '\\': dest.append("\\" "\\"); break; + default: + // Note that if we emit \xNN and the src character after that is a hex + // digit then that digit must be escaped too to prevent it being + // interpreted as part of the character code by C. + if ((!utf8_safe || c < 0x80) && + (!absl::ascii_isprint(c) || + (last_hex_escape && absl::ascii_isxdigit(c)))) { + if (use_hex) { + dest.append("\\" "x"); + dest.push_back(kHexChar[c / 16]); + dest.push_back(kHexChar[c % 16]); + is_hex_escape = true; + } else { + dest.append("\\"); + dest.push_back(kHexChar[c / 64]); + dest.push_back(kHexChar[(c % 64) / 8]); + dest.push_back(kHexChar[c % 8]); + } + } else { + dest.push_back(c); + break; + } + } + last_hex_escape = is_hex_escape; + } + + return dest; +} + +/* clang-format off */ +constexpr char c_escaped_len[256] = { + 4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 4, 4, 2, 4, 4, // \t, \n, \r + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 1, 1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, // ", ' + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // '0'..'9' + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 'A'..'O' + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, // 'P'..'Z', '\' + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 'a'..'o' + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, // 'p'..'z', DEL + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, +}; +/* clang-format on */ + +// Calculates the length of the C-style escaped version of 'src'. +// Assumes that non-printable characters are escaped using octal sequences, and +// that UTF-8 bytes are not handled specially. +inline size_t CEscapedLength(absl::string_view src) { + size_t escaped_len = 0; + for (unsigned char c : src) escaped_len += c_escaped_len[c]; + return escaped_len; +} + +void CEscapeAndAppendInternal(absl::string_view src, std::string* dest) { + size_t escaped_len = CEscapedLength(src); + if (escaped_len == src.size()) { + dest->append(src.data(), src.size()); + return; + } + + size_t cur_dest_len = dest->size(); + strings_internal::STLStringResizeUninitialized(dest, + cur_dest_len + escaped_len); + char* append_ptr = &(*dest)[cur_dest_len]; + + for (unsigned char c : src) { + int char_len = c_escaped_len[c]; + if (char_len == 1) { + *append_ptr++ = c; + } else if (char_len == 2) { + switch (c) { + case '\n': + *append_ptr++ = '\\'; + *append_ptr++ = 'n'; + break; + case '\r': + *append_ptr++ = '\\'; + *append_ptr++ = 'r'; + break; + case '\t': + *append_ptr++ = '\\'; + *append_ptr++ = 't'; + break; + case '\"': + *append_ptr++ = '\\'; + *append_ptr++ = '\"'; + break; + case '\'': + *append_ptr++ = '\\'; + *append_ptr++ = '\''; + break; + case '\\': + *append_ptr++ = '\\'; + *append_ptr++ = '\\'; + break; + } + } else { + *append_ptr++ = '\\'; + *append_ptr++ = '0' + c / 64; + *append_ptr++ = '0' + (c % 64) / 8; + *append_ptr++ = '0' + c % 8; + } + } +} + +bool Base64UnescapeInternal(const char* src_param, size_t szsrc, char* dest, + size_t szdest, const signed char* unbase64, + size_t* len) { + static const char kPad64Equals = '='; + static const char kPad64Dot = '.'; + + size_t destidx = 0; + int decode = 0; + int state = 0; + unsigned int ch = 0; + unsigned int temp = 0; + + // If "char" is signed by default, using *src as an array index results in + // accessing negative array elements. Treat the input as a pointer to + // unsigned char to avoid this. + const unsigned char* src = reinterpret_cast<const unsigned char*>(src_param); + + // The GET_INPUT macro gets the next input character, skipping + // over any whitespace, and stopping when we reach the end of the + // std::string or when we read any non-data character. The arguments are + // an arbitrary identifier (used as a label for goto) and the number + // of data bytes that must remain in the input to avoid aborting the + // loop. +#define GET_INPUT(label, remain) \ + label: \ + --szsrc; \ + ch = *src++; \ + decode = unbase64[ch]; \ + if (decode < 0) { \ + if (absl::ascii_isspace(ch) && szsrc >= remain) goto label; \ + state = 4 - remain; \ + break; \ + } + + // if dest is null, we're just checking to see if it's legal input + // rather than producing output. (I suspect this could just be done + // with a regexp...). We duplicate the loop so this test can be + // outside it instead of in every iteration. + + if (dest) { + // This loop consumes 4 input bytes and produces 3 output bytes + // per iteration. We can't know at the start that there is enough + // data left in the std::string for a full iteration, so the loop may + // break out in the middle; if so 'state' will be set to the + // number of input bytes read. + + while (szsrc >= 4) { + // We'll start by optimistically assuming that the next four + // bytes of the std::string (src[0..3]) are four good data bytes + // (that is, no nulls, whitespace, padding chars, or illegal + // chars). We need to test src[0..2] for nulls individually + // before constructing temp to preserve the property that we + // never read past a null in the std::string (no matter how long + // szsrc claims the std::string is). + + if (!src[0] || !src[1] || !src[2] || + ((temp = ((unsigned(unbase64[src[0]]) << 18) | + (unsigned(unbase64[src[1]]) << 12) | + (unsigned(unbase64[src[2]]) << 6) | + (unsigned(unbase64[src[3]])))) & + 0x80000000)) { + // Iff any of those four characters was bad (null, illegal, + // whitespace, padding), then temp's high bit will be set + // (because unbase64[] is -1 for all bad characters). + // + // We'll back up and resort to the slower decoder, which knows + // how to handle those cases. + + GET_INPUT(first, 4); + temp = decode; + GET_INPUT(second, 3); + temp = (temp << 6) | decode; + GET_INPUT(third, 2); + temp = (temp << 6) | decode; + GET_INPUT(fourth, 1); + temp = (temp << 6) | decode; + } else { + // We really did have four good data bytes, so advance four + // characters in the std::string. + + szsrc -= 4; + src += 4; + } + + // temp has 24 bits of input, so write that out as three bytes. + + if (destidx + 3 > szdest) return false; + dest[destidx + 2] = temp; + temp >>= 8; + dest[destidx + 1] = temp; + temp >>= 8; + dest[destidx] = temp; + destidx += 3; + } + } else { + while (szsrc >= 4) { + if (!src[0] || !src[1] || !src[2] || + ((temp = ((unsigned(unbase64[src[0]]) << 18) | + (unsigned(unbase64[src[1]]) << 12) | + (unsigned(unbase64[src[2]]) << 6) | + (unsigned(unbase64[src[3]])))) & + 0x80000000)) { + GET_INPUT(first_no_dest, 4); + GET_INPUT(second_no_dest, 3); + GET_INPUT(third_no_dest, 2); + GET_INPUT(fourth_no_dest, 1); + } else { + szsrc -= 4; + src += 4; + } + destidx += 3; + } + } + +#undef GET_INPUT + + // if the loop terminated because we read a bad character, return + // now. + if (decode < 0 && ch != kPad64Equals && ch != kPad64Dot && + !absl::ascii_isspace(ch)) + return false; + + if (ch == kPad64Equals || ch == kPad64Dot) { + // if we stopped by hitting an '=' or '.', un-read that character -- we'll + // look at it again when we count to check for the proper number of + // equals signs at the end. + ++szsrc; + --src; + } else { + // This loop consumes 1 input byte per iteration. It's used to + // clean up the 0-3 input bytes remaining when the first, faster + // loop finishes. 'temp' contains the data from 'state' input + // characters read by the first loop. + while (szsrc > 0) { + --szsrc; + ch = *src++; + decode = unbase64[ch]; + if (decode < 0) { + if (absl::ascii_isspace(ch)) { + continue; + } else if (ch == kPad64Equals || ch == kPad64Dot) { + // back up one character; we'll read it again when we check + // for the correct number of pad characters at the end. + ++szsrc; + --src; + break; + } else { + return false; + } + } + + // Each input character gives us six bits of output. + temp = (temp << 6) | decode; + ++state; + if (state == 4) { + // If we've accumulated 24 bits of output, write that out as + // three bytes. + if (dest) { + if (destidx + 3 > szdest) return false; + dest[destidx + 2] = temp; + temp >>= 8; + dest[destidx + 1] = temp; + temp >>= 8; + dest[destidx] = temp; + } + destidx += 3; + state = 0; + temp = 0; + } + } + } + + // Process the leftover data contained in 'temp' at the end of the input. + int expected_equals = 0; + switch (state) { + case 0: + // Nothing left over; output is a multiple of 3 bytes. + break; + + case 1: + // Bad input; we have 6 bits left over. + return false; + + case 2: + // Produce one more output byte from the 12 input bits we have left. + if (dest) { + if (destidx + 1 > szdest) return false; + temp >>= 4; + dest[destidx] = temp; + } + ++destidx; + expected_equals = 2; + break; + + case 3: + // Produce two more output bytes from the 18 input bits we have left. + if (dest) { + if (destidx + 2 > szdest) return false; + temp >>= 2; + dest[destidx + 1] = temp; + temp >>= 8; + dest[destidx] = temp; + } + destidx += 2; + expected_equals = 1; + break; + + default: + // state should have no other values at this point. + ABSL_RAW_LOG(FATAL, "This can't happen; base64 decoder state = %d", + state); + } + + // The remainder of the std::string should be all whitespace, mixed with + // exactly 0 equals signs, or exactly 'expected_equals' equals + // signs. (Always accepting 0 equals signs is an Abseil extension + // not covered in the RFC, as is accepting dot as the pad character.) + + int equals = 0; + while (szsrc > 0) { + if (*src == kPad64Equals || *src == kPad64Dot) + ++equals; + else if (!absl::ascii_isspace(*src)) + return false; + --szsrc; + ++src; + } + + const bool ok = (equals == 0 || equals == expected_equals); + if (ok) *len = destidx; + return ok; +} + +// The arrays below were generated by the following code +// #include <sys/time.h> +// #include <stdlib.h> +// #include <std::string.h> +// main() +// { +// static const char Base64[] = +// "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; +// char* pos; +// int idx, i, j; +// printf(" "); +// for (i = 0; i < 255; i += 8) { +// for (j = i; j < i + 8; j++) { +// pos = strchr(Base64, j); +// if ((pos == nullptr) || (j == 0)) +// idx = -1; +// else +// idx = pos - Base64; +// if (idx == -1) +// printf(" %2d, ", idx); +// else +// printf(" %2d/*%c*/,", idx, j); +// } +// printf("\n "); +// } +// } +// +// where the value of "Base64[]" was replaced by one of the base-64 conversion +// tables from the functions below. +/* clang-format off */ +constexpr signed char kUnBase64[] = { + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, 62/*+*/, -1, -1, -1, 63/*/ */, + 52/*0*/, 53/*1*/, 54/*2*/, 55/*3*/, 56/*4*/, 57/*5*/, 58/*6*/, 59/*7*/, + 60/*8*/, 61/*9*/, -1, -1, -1, -1, -1, -1, + -1, 0/*A*/, 1/*B*/, 2/*C*/, 3/*D*/, 4/*E*/, 5/*F*/, 6/*G*/, + 07/*H*/, 8/*I*/, 9/*J*/, 10/*K*/, 11/*L*/, 12/*M*/, 13/*N*/, 14/*O*/, + 15/*P*/, 16/*Q*/, 17/*R*/, 18/*S*/, 19/*T*/, 20/*U*/, 21/*V*/, 22/*W*/, + 23/*X*/, 24/*Y*/, 25/*Z*/, -1, -1, -1, -1, -1, + -1, 26/*a*/, 27/*b*/, 28/*c*/, 29/*d*/, 30/*e*/, 31/*f*/, 32/*g*/, + 33/*h*/, 34/*i*/, 35/*j*/, 36/*k*/, 37/*l*/, 38/*m*/, 39/*n*/, 40/*o*/, + 41/*p*/, 42/*q*/, 43/*r*/, 44/*s*/, 45/*t*/, 46/*u*/, 47/*v*/, 48/*w*/, + 49/*x*/, 50/*y*/, 51/*z*/, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1 +}; + +constexpr signed char kUnWebSafeBase64[] = { + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, 62/*-*/, -1, -1, + 52/*0*/, 53/*1*/, 54/*2*/, 55/*3*/, 56/*4*/, 57/*5*/, 58/*6*/, 59/*7*/, + 60/*8*/, 61/*9*/, -1, -1, -1, -1, -1, -1, + -1, 0/*A*/, 1/*B*/, 2/*C*/, 3/*D*/, 4/*E*/, 5/*F*/, 6/*G*/, + 07/*H*/, 8/*I*/, 9/*J*/, 10/*K*/, 11/*L*/, 12/*M*/, 13/*N*/, 14/*O*/, + 15/*P*/, 16/*Q*/, 17/*R*/, 18/*S*/, 19/*T*/, 20/*U*/, 21/*V*/, 22/*W*/, + 23/*X*/, 24/*Y*/, 25/*Z*/, -1, -1, -1, -1, 63/*_*/, + -1, 26/*a*/, 27/*b*/, 28/*c*/, 29/*d*/, 30/*e*/, 31/*f*/, 32/*g*/, + 33/*h*/, 34/*i*/, 35/*j*/, 36/*k*/, 37/*l*/, 38/*m*/, 39/*n*/, 40/*o*/, + 41/*p*/, 42/*q*/, 43/*r*/, 44/*s*/, 45/*t*/, 46/*u*/, 47/*v*/, 48/*w*/, + 49/*x*/, 50/*y*/, 51/*z*/, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1 +}; +/* clang-format on */ + +size_t CalculateBase64EscapedLenInternal(size_t input_len, bool do_padding) { + // Base64 encodes three bytes of input at a time. If the input is not + // divisible by three, we pad as appropriate. + // + // (from http://tools.ietf.org/html/rfc3548) + // Special processing is performed if fewer than 24 bits are available + // at the end of the data being encoded. A full encoding quantum is + // always completed at the end of a quantity. When fewer than 24 input + // bits are available in an input group, zero bits are added (on the + // right) to form an integral number of 6-bit groups. Padding at the + // end of the data is performed using the '=' character. Since all base + // 64 input is an integral number of octets, only the following cases + // can arise: + + // Base64 encodes each three bytes of input into four bytes of output. + size_t len = (input_len / 3) * 4; + + if (input_len % 3 == 0) { + // (from http://tools.ietf.org/html/rfc3548) + // (1) the final quantum of encoding input is an integral multiple of 24 + // bits; here, the final unit of encoded output will be an integral + // multiple of 4 characters with no "=" padding, + } else if (input_len % 3 == 1) { + // (from http://tools.ietf.org/html/rfc3548) + // (2) the final quantum of encoding input is exactly 8 bits; here, the + // final unit of encoded output will be two characters followed by two + // "=" padding characters, or + len += 2; + if (do_padding) { + len += 2; + } + } else { // (input_len % 3 == 2) + // (from http://tools.ietf.org/html/rfc3548) + // (3) the final quantum of encoding input is exactly 16 bits; here, the + // final unit of encoded output will be three characters followed by one + // "=" padding character. + len += 3; + if (do_padding) { + len += 1; + } + } + + assert(len >= input_len); // make sure we didn't overflow + return len; +} + +size_t Base64EscapeInternal(const unsigned char* src, size_t szsrc, char* dest, + size_t szdest, const char* base64, + bool do_padding) { + static const char kPad64 = '='; + + if (szsrc * 4 > szdest * 3) return 0; + + char* cur_dest = dest; + const unsigned char* cur_src = src; + + char* const limit_dest = dest + szdest; + const unsigned char* const limit_src = src + szsrc; + + // Three bytes of data encodes to four characters of cyphertext. + // So we can pump through three-byte chunks atomically. + if (szsrc >= 3) { // "limit_src - 3" is UB if szsrc < 3 + while (cur_src < limit_src - 3) { // as long as we have >= 32 bits + uint32_t in = absl::big_endian::Load32(cur_src) >> 8; + + cur_dest[0] = base64[in >> 18]; + in &= 0x3FFFF; + cur_dest[1] = base64[in >> 12]; + in &= 0xFFF; + cur_dest[2] = base64[in >> 6]; + in &= 0x3F; + cur_dest[3] = base64[in]; + + cur_dest += 4; + cur_src += 3; + } + } + // To save time, we didn't update szdest or szsrc in the loop. So do it now. + szdest = limit_dest - cur_dest; + szsrc = limit_src - cur_src; + + /* now deal with the tail (<=3 bytes) */ + switch (szsrc) { + case 0: + // Nothing left; nothing more to do. + break; + case 1: { + // One byte left: this encodes to two characters, and (optionally) + // two pad characters to round out the four-character cypherblock. + if (szdest < 2) return 0; + uint32_t in = cur_src[0]; + cur_dest[0] = base64[in >> 2]; + in &= 0x3; + cur_dest[1] = base64[in << 4]; + cur_dest += 2; + szdest -= 2; + if (do_padding) { + if (szdest < 2) return 0; + cur_dest[0] = kPad64; + cur_dest[1] = kPad64; + cur_dest += 2; + szdest -= 2; + } + break; + } + case 2: { + // Two bytes left: this encodes to three characters, and (optionally) + // one pad character to round out the four-character cypherblock. + if (szdest < 3) return 0; + uint32_t in = absl::big_endian::Load16(cur_src); + cur_dest[0] = base64[in >> 10]; + in &= 0x3FF; + cur_dest[1] = base64[in >> 4]; + in &= 0x00F; + cur_dest[2] = base64[in << 2]; + cur_dest += 3; + szdest -= 3; + if (do_padding) { + if (szdest < 1) return 0; + cur_dest[0] = kPad64; + cur_dest += 1; + szdest -= 1; + } + break; + } + case 3: { + // Three bytes left: same as in the big loop above. We can't do this in + // the loop because the loop above always reads 4 bytes, and the fourth + // byte is past the end of the input. + if (szdest < 4) return 0; + uint32_t in = (cur_src[0] << 16) + absl::big_endian::Load16(cur_src + 1); + cur_dest[0] = base64[in >> 18]; + in &= 0x3FFFF; + cur_dest[1] = base64[in >> 12]; + in &= 0xFFF; + cur_dest[2] = base64[in >> 6]; + in &= 0x3F; + cur_dest[3] = base64[in]; + cur_dest += 4; + szdest -= 4; + break; + } + default: + // Should not be reached: blocks of 4 bytes are handled + // in the while loop before this switch statement. + ABSL_RAW_LOG(FATAL, "Logic problem? szsrc = %zu", szsrc); + break; + } + return (cur_dest - dest); +} + +constexpr char kBase64Chars[] = + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; + +constexpr char kWebSafeBase64Chars[] = + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_"; + +void Base64EscapeInternal(const unsigned char* src, size_t szsrc, std::string* dest, + bool do_padding, const char* base64_chars) { + const size_t calc_escaped_size = + CalculateBase64EscapedLenInternal(szsrc, do_padding); + strings_internal::STLStringResizeUninitialized(dest, calc_escaped_size); + + const size_t escaped_len = Base64EscapeInternal( + src, szsrc, &(*dest)[0], dest->size(), base64_chars, do_padding); + assert(calc_escaped_size == escaped_len); + dest->erase(escaped_len); +} + +bool Base64UnescapeInternal(const char* src, size_t slen, std::string* dest, + const signed char* unbase64) { + // Determine the size of the output std::string. Base64 encodes every 3 bytes into + // 4 characters. any leftover chars are added directly for good measure. + // This is documented in the base64 RFC: http://tools.ietf.org/html/rfc3548 + const size_t dest_len = 3 * (slen / 4) + (slen % 4); + + strings_internal::STLStringResizeUninitialized(dest, dest_len); + + // We are getting the destination buffer by getting the beginning of the + // std::string and converting it into a char *. + size_t len; + const bool ok = + Base64UnescapeInternal(src, slen, &(*dest)[0], dest_len, unbase64, &len); + if (!ok) { + dest->clear(); + return false; + } + + // could be shorter if there was padding + assert(len <= dest_len); + dest->erase(len); + + return true; +} + +/* clang-format off */ +constexpr char kHexValue[256] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 0, 0, 0, 0, 0, // '0'..'9' + 0, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 'A'..'F' + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 'a'..'f' + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +}; +/* clang-format on */ + +// This is a templated function so that T can be either a char* +// or a std::string. This works because we use the [] operator to access +// individual characters at a time. +template <typename T> +void HexStringToBytesInternal(const char* from, T to, ptrdiff_t num) { + for (int i = 0; i < num; i++) { + to[i] = (kHexValue[from[i * 2] & 0xFF] << 4) + + (kHexValue[from[i * 2 + 1] & 0xFF]); + } +} + +// This is a templated function so that T can be either a char* or a std::string. +template <typename T> +void BytesToHexStringInternal(const unsigned char* src, T dest, ptrdiff_t num) { + auto dest_ptr = &dest[0]; + for (auto src_ptr = src; src_ptr != (src + num); ++src_ptr, dest_ptr += 2) { + const char* hex_p = &kHexTable[*src_ptr * 2]; + std::copy(hex_p, hex_p + 2, dest_ptr); + } +} + +} // namespace + +// ---------------------------------------------------------------------- +// CUnescape() +// +// See CUnescapeInternal() for implementation details. +// ---------------------------------------------------------------------- +bool CUnescape(absl::string_view source, std::string* dest, std::string* error) { + return CUnescapeInternal(source, kUnescapeNulls, dest, error); +} + +std::string CEscape(absl::string_view src) { + std::string dest; + CEscapeAndAppendInternal(src, &dest); + return dest; +} + +std::string CHexEscape(absl::string_view src) { + return CEscapeInternal(src, true, false); +} + +std::string Utf8SafeCEscape(absl::string_view src) { + return CEscapeInternal(src, false, true); +} + +std::string Utf8SafeCHexEscape(absl::string_view src) { + return CEscapeInternal(src, true, true); +} + +// ---------------------------------------------------------------------- +// ptrdiff_t Base64Unescape() - base64 decoder +// ptrdiff_t Base64Escape() - base64 encoder +// ptrdiff_t WebSafeBase64Unescape() - Google's variation of base64 decoder +// ptrdiff_t WebSafeBase64Escape() - Google's variation of base64 encoder +// +// Check out +// http://tools.ietf.org/html/rfc2045 for formal description, but what we +// care about is that... +// Take the encoded stuff in groups of 4 characters and turn each +// character into a code 0 to 63 thus: +// A-Z map to 0 to 25 +// a-z map to 26 to 51 +// 0-9 map to 52 to 61 +// +(- for WebSafe) maps to 62 +// /(_ for WebSafe) maps to 63 +// There will be four numbers, all less than 64 which can be represented +// by a 6 digit binary number (aaaaaa, bbbbbb, cccccc, dddddd respectively). +// Arrange the 6 digit binary numbers into three bytes as such: +// aaaaaabb bbbbcccc ccdddddd +// Equals signs (one or two) are used at the end of the encoded block to +// indicate that the text was not an integer multiple of three bytes long. +// ---------------------------------------------------------------------- + +bool Base64Unescape(absl::string_view src, std::string* dest) { + return Base64UnescapeInternal(src.data(), src.size(), dest, kUnBase64); +} + +bool WebSafeBase64Unescape(absl::string_view src, std::string* dest) { + return Base64UnescapeInternal(src.data(), src.size(), dest, kUnWebSafeBase64); +} + +void Base64Escape(absl::string_view src, std::string* dest) { + Base64EscapeInternal(reinterpret_cast<const unsigned char*>(src.data()), + src.size(), dest, true, kBase64Chars); +} + +void WebSafeBase64Escape(absl::string_view src, std::string* dest) { + Base64EscapeInternal(reinterpret_cast<const unsigned char*>(src.data()), + src.size(), dest, false, kWebSafeBase64Chars); +} + +std::string HexStringToBytes(absl::string_view from) { + std::string result; + const auto num = from.size() / 2; + strings_internal::STLStringResizeUninitialized(&result, num); + absl::HexStringToBytesInternal<std::string&>(from.data(), result, num); + return result; +} + +std::string BytesToHexString(absl::string_view from) { + std::string result; + strings_internal::STLStringResizeUninitialized(&result, 2 * from.size()); + absl::BytesToHexStringInternal<std::string&>( + reinterpret_cast<const unsigned char*>(from.data()), result, from.size()); + return result; +} + +} // namespace absl http://git-wip-us.apache.org/repos/asf/marmotta/blob/0eb556da/libraries/ostrich/backend/3rdparty/abseil/absl/strings/escaping.h ---------------------------------------------------------------------- diff --git a/libraries/ostrich/backend/3rdparty/abseil/absl/strings/escaping.h b/libraries/ostrich/backend/3rdparty/abseil/absl/strings/escaping.h new file mode 100644 index 0000000..1af0afa --- /dev/null +++ b/libraries/ostrich/backend/3rdparty/abseil/absl/strings/escaping.h @@ -0,0 +1,161 @@ +// +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// ----------------------------------------------------------------------------- +// File: escaping.h +// ----------------------------------------------------------------------------- +// +// This header file contains std::string utilities involved in escaping and +// unescaping strings in various ways. +// + +#ifndef ABSL_STRINGS_ESCAPING_H_ +#define ABSL_STRINGS_ESCAPING_H_ + +#include <cstddef> +#include <string> +#include <vector> + +#include "absl/base/macros.h" +#include "absl/strings/ascii.h" +#include "absl/strings/str_join.h" +#include "absl/strings/string_view.h" + +namespace absl { + +// CUnescape() +// +// Unescapes a `source` std::string and copies it into `dest`, rewriting C-style +// escape sequences (http://en.cppreference.com/w/cpp/language/escape) into +// their proper code point equivalents, returning `true` if successful. +// +// The following unescape sequences can be handled: +// +// * ASCII escape sequences ('\n','\r','\\', etc.) to their ASCII equivalents +// * Octal escape sequences ('\nnn') to byte nnn. The unescaped value must +// resolve to a single byte or an error will occur. E.g. values greater than +// 0xff will produce an error. +// * Hexadecimal escape sequences ('\xnn') to byte nn. While an arbitrary +// number of following digits are allowed, the unescaped value must resolve +// to a single byte or an error will occur. E.g. '\x0045' is equivalent to +// '\x45', but '\x1234' will produce an error. +// * Unicode escape sequences ('\unnnn' for exactly four hex digits or +// '\Unnnnnnnn' for exactly eight hex digits, which will be encoded in +// UTF-8. (E.g., `\u2019` unescapes to the three bytes 0xE2, 0x80, and +// 0x99). +// +// +// If any errors are encountered, this function returns `false` and stores the +// first encountered error in `error`. To disable error reporting, set `error` +// to `nullptr` or use the overload with no error reporting below. +// +// Example: +// +// std::string s = "foo\\rbar\\nbaz\\t"; +// std::string unescaped_s; +// if (!absl::CUnescape(s, &unescaped_s) { +// ... +// } +// EXPECT_EQ(unescaped_s, "foo\rbar\nbaz\t"); +bool CUnescape(absl::string_view source, std::string* dest, std::string* error); + +// Overload of `CUnescape()` with no error reporting. +inline bool CUnescape(absl::string_view source, std::string* dest) { + return CUnescape(source, dest, nullptr); +} + +// CEscape() +// +// Escapes a 'src' std::string using C-style escapes sequences +// (http://en.cppreference.com/w/cpp/language/escape), escaping other +// non-printable/non-whitespace bytes as octal sequences (e.g. "\377"). +// +// Example: +// +// std::string s = "foo\rbar\tbaz\010\011\012\013\014\x0d\n"; +// std::string escaped_s = absl::CEscape(s); +// EXPECT_EQ(escaped_s, "foo\\rbar\\tbaz\\010\\t\\n\\013\\014\\r\\n"); +std::string CEscape(absl::string_view src); + +// CHexEscape() +// +// Escapes a 'src' std::string using C-style escape sequences, escaping +// other non-printable/non-whitespace bytes as hexadecimal sequences (e.g. +// "\xFF"). +// +// Example: +// +// std::string s = "foo\rbar\tbaz\010\011\012\013\014\x0d\n"; +// std::string escaped_s = absl::CHexEscape(s); +// EXPECT_EQ(escaped_s, "foo\\rbar\\tbaz\\x08\\t\\n\\x0b\\x0c\\r\\n"); +std::string CHexEscape(absl::string_view src); + +// Utf8SafeCEscape() +// +// Escapes a 'src' std::string using C-style escape sequences, escaping bytes as +// octal sequences, and passing through UTF-8 characters without conversion. +// I.e., when encountering any bytes with their high bit set, this function +// will not escape those values, whether or not they are valid UTF-8. +std::string Utf8SafeCEscape(absl::string_view src); + +// Utf8SafeCHexEscape() +// +// Escapes a 'src' std::string using C-style escape sequences, escaping bytes as +// hexadecimal sequences, and passing through UTF-8 characters without +// conversion. +std::string Utf8SafeCHexEscape(absl::string_view src); + +// Base64Unescape() +// +// Converts a `src` std::string encoded in Base64 to its binary equivalent, writing +// it to a `dest` buffer, returning `true` on success. If `src` contains invalid +// characters, `dest` is cleared and returns `false`. +bool Base64Unescape(absl::string_view src, std::string* dest); + +// WebSafeBase64Unescape(absl::string_view, std::string*) +// +// Converts a `src` std::string encoded in Base64 to its binary equivalent, writing +// it to a `dest` buffer, but using '-' instead of '+', and '_' instead of '/'. +// If `src` contains invalid characters, `dest` is cleared and returns `false`. +bool WebSafeBase64Unescape(absl::string_view src, std::string* dest); + +// Base64Escape() +// +// Encodes a `src` std::string into a `dest` buffer using base64 encoding, with +// padding characters. This function conforms with RFC 4648 section 4 (base64). +void Base64Escape(absl::string_view src, std::string* dest); + +// WebSafeBase64Escape() +// +// Encodes a `src` std::string into a `dest` buffer using uses '-' instead of '+' and +// '_' instead of '/', and without padding. This function conforms with RFC 4648 +// section 5 (base64url). +void WebSafeBase64Escape(absl::string_view src, std::string* dest); + +// HexStringToBytes() +// +// Converts an ASCII hex std::string into bytes, returning binary data of length +// `from.size()/2`. +std::string HexStringToBytes(absl::string_view from); + +// BytesToHexString() +// +// Converts binary data into an ASCII text std::string, returning a std::string of size +// `2*from.size()`. +std::string BytesToHexString(absl::string_view from); + +} // namespace absl + +#endif // ABSL_STRINGS_ESCAPING_H_ http://git-wip-us.apache.org/repos/asf/marmotta/blob/0eb556da/libraries/ostrich/backend/3rdparty/abseil/absl/strings/escaping_test.cc ---------------------------------------------------------------------- diff --git a/libraries/ostrich/backend/3rdparty/abseil/absl/strings/escaping_test.cc b/libraries/ostrich/backend/3rdparty/abseil/absl/strings/escaping_test.cc new file mode 100644 index 0000000..982989b --- /dev/null +++ b/libraries/ostrich/backend/3rdparty/abseil/absl/strings/escaping_test.cc @@ -0,0 +1,641 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/escaping.h" + +#include <array> +#include <cstdio> +#include <cstring> +#include <memory> +#include <vector> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/container/fixed_array.h" +#include "absl/strings/str_cat.h" + +#include "absl/strings/internal/escaping_test_common.inc" + +namespace { + +struct epair { + std::string escaped; + std::string unescaped; +}; + +TEST(CEscape, EscapeAndUnescape) { + const std::string inputs[] = { + std::string("foo\nxx\r\b\0023"), + std::string(""), + std::string("abc"), + std::string("\1chad_rules"), + std::string("\1arnar_drools"), + std::string("xxxx\r\t'\"\\"), + std::string("\0xx\0", 4), + std::string("\x01\x31"), + std::string("abc\xb\x42\141bc"), + std::string("123\1\x31\x32\x33"), + std::string("\xc1\xca\x1b\x62\x19o\xcc\x04"), + std::string("\\\"\xe8\xb0\xb7\xe6\xad\x8c\\\" is Google\\\'s Chinese name"), + }; + // Do this twice, once for octal escapes and once for hex escapes. + for (int kind = 0; kind < 4; kind++) { + for (const std::string& original : inputs) { + std::string escaped; + switch (kind) { + case 0: + escaped = absl::CEscape(original); + break; + case 1: + escaped = absl::CHexEscape(original); + break; + case 2: + escaped = absl::Utf8SafeCEscape(original); + break; + case 3: + escaped = absl::Utf8SafeCHexEscape(original); + break; + } + std::string unescaped_str; + EXPECT_TRUE(absl::CUnescape(escaped, &unescaped_str)); + EXPECT_EQ(unescaped_str, original); + + // Check in-place unescaping + std::string s = escaped; + EXPECT_TRUE(absl::CUnescape(s, &s)); + ASSERT_EQ(s, original); + } + } + // Check that all possible two character strings can be escaped then + // unescaped successfully. + for (int char0 = 0; char0 < 256; char0++) { + for (int char1 = 0; char1 < 256; char1++) { + char chars[2]; + chars[0] = char0; + chars[1] = char1; + std::string s(chars, 2); + std::string escaped = absl::CHexEscape(s); + std::string unescaped; + EXPECT_TRUE(absl::CUnescape(escaped, &unescaped)); + EXPECT_EQ(s, unescaped); + } + } +} + +TEST(CEscape, BasicEscaping) { + epair oct_values[] = { + {"foo\\rbar\\nbaz\\t", "foo\rbar\nbaz\t"}, + {"\\'full of \\\"sound\\\" and \\\"fury\\\"\\'", + "'full of \"sound\" and \"fury\"'"}, + {"signi\\\\fying\\\\ nothing\\\\", "signi\\fying\\ nothing\\"}, + {"\\010\\t\\n\\013\\014\\r", "\010\011\012\013\014\015"} + }; + epair hex_values[] = { + {"ubik\\rubik\\nubik\\t", "ubik\rubik\nubik\t"}, + {"I\\\'ve just seen a \\\"face\\\"", + "I've just seen a \"face\""}, + {"hel\\\\ter\\\\skel\\\\ter\\\\", "hel\\ter\\skel\\ter\\"}, + {"\\x08\\t\\n\\x0b\\x0c\\r", "\010\011\012\013\014\015"} + }; + epair utf8_oct_values[] = { + {"\xe8\xb0\xb7\xe6\xad\x8c\\r\xe8\xb0\xb7\xe6\xad\x8c\\nbaz\\t", + "\xe8\xb0\xb7\xe6\xad\x8c\r\xe8\xb0\xb7\xe6\xad\x8c\nbaz\t"}, + {"\\\"\xe8\xb0\xb7\xe6\xad\x8c\\\" is Google\\\'s Chinese name", + "\"\xe8\xb0\xb7\xe6\xad\x8c\" is Google\'s Chinese name"}, + {"\xe3\x83\xa1\xe3\x83\xbc\xe3\x83\xab\\\\are\\\\Japanese\\\\chars\\\\", + "\xe3\x83\xa1\xe3\x83\xbc\xe3\x83\xab\\are\\Japanese\\chars\\"}, + {"\xed\x81\xac\xeb\xa1\xac\\010\\t\\n\\013\\014\\r", + "\xed\x81\xac\xeb\xa1\xac\010\011\012\013\014\015"} + }; + epair utf8_hex_values[] = { + {"\x20\xe4\xbd\xa0\\t\xe5\xa5\xbd,\\r!\\n", + "\x20\xe4\xbd\xa0\t\xe5\xa5\xbd,\r!\n"}, + {"\xe8\xa9\xa6\xe9\xa8\x93\\\' means \\\"test\\\"", + "\xe8\xa9\xa6\xe9\xa8\x93\' means \"test\""}, + {"\\\\\xe6\x88\x91\\\\:\\\\\xe6\x9d\xa8\xe6\xac\xa2\\\\", + "\\\xe6\x88\x91\\:\\\xe6\x9d\xa8\xe6\xac\xa2\\"}, + {"\xed\x81\xac\xeb\xa1\xac\\x08\\t\\n\\x0b\\x0c\\r", + "\xed\x81\xac\xeb\xa1\xac\010\011\012\013\014\015"} + }; + + for (const epair& val : oct_values) { + std::string escaped = absl::CEscape(val.unescaped); + EXPECT_EQ(escaped, val.escaped); + } + for (const epair& val : hex_values) { + std::string escaped = absl::CHexEscape(val.unescaped); + EXPECT_EQ(escaped, val.escaped); + } + for (const epair& val : utf8_oct_values) { + std::string escaped = absl::Utf8SafeCEscape(val.unescaped); + EXPECT_EQ(escaped, val.escaped); + } + for (const epair& val : utf8_hex_values) { + std::string escaped = absl::Utf8SafeCHexEscape(val.unescaped); + EXPECT_EQ(escaped, val.escaped); + } +} + +TEST(Unescape, BasicFunction) { + epair tests[] = + {{"\\u0030", "0"}, + {"\\u00A3", "\xC2\xA3"}, + {"\\u22FD", "\xE2\x8B\xBD"}, + {"\\U00010000", "\xF0\x90\x80\x80"}, + {"\\U0010FFFD", "\xF4\x8F\xBF\xBD"}}; + for (const epair& val : tests) { + std::string out; + EXPECT_TRUE(absl::CUnescape(val.escaped, &out)); + EXPECT_EQ(out, val.unescaped); + } + std::string bad[] = + {"\\u1", // too short + "\\U1", // too short + "\\Uffffff", // exceeds 0x10ffff (largest Unicode) + "\\U00110000", // exceeds 0x10ffff (largest Unicode) + "\\uD835", // surrogate character (D800-DFFF) + "\\U0000DD04", // surrogate character (D800-DFFF) + "\\777", // exceeds 0xff + "\\xABCD"}; // exceeds 0xff + for (const std::string& e : bad) { + std::string error; + std::string out; + EXPECT_FALSE(absl::CUnescape(e, &out, &error)); + EXPECT_FALSE(error.empty()); + } +} + +class CUnescapeTest : public testing::Test { + protected: + static const char kStringWithMultipleOctalNulls[]; + static const char kStringWithMultipleHexNulls[]; + static const char kStringWithMultipleUnicodeNulls[]; + + std::string result_string_; +}; + +const char CUnescapeTest::kStringWithMultipleOctalNulls[] = + "\\0\\n" // null escape \0 plus newline + "0\\n" // just a number 0 (not a null escape) plus newline + "\\00\\12" // null escape \00 plus octal newline code + "\\000"; // null escape \000 + +// This has the same ingredients as kStringWithMultipleOctalNulls +// but with \x hex escapes instead of octal escapes. +const char CUnescapeTest::kStringWithMultipleHexNulls[] = + "\\x0\\n" + "0\\n" + "\\x00\\xa" + "\\x000"; + +const char CUnescapeTest::kStringWithMultipleUnicodeNulls[] = + "\\u0000\\n" // short-form (4-digit) null escape plus newline + "0\\n" // just a number 0 (not a null escape) plus newline + "\\U00000000"; // long-form (8-digit) null escape + +TEST_F(CUnescapeTest, Unescapes1CharOctalNull) { + std::string original_string = "\\0"; + EXPECT_TRUE(absl::CUnescape(original_string, &result_string_)); + EXPECT_EQ(std::string("\0", 1), result_string_); +} + +TEST_F(CUnescapeTest, Unescapes2CharOctalNull) { + std::string original_string = "\\00"; + EXPECT_TRUE(absl::CUnescape(original_string, &result_string_)); + EXPECT_EQ(std::string("\0", 1), result_string_); +} + +TEST_F(CUnescapeTest, Unescapes3CharOctalNull) { + std::string original_string = "\\000"; + EXPECT_TRUE(absl::CUnescape(original_string, &result_string_)); + EXPECT_EQ(std::string("\0", 1), result_string_); +} + +TEST_F(CUnescapeTest, Unescapes1CharHexNull) { + std::string original_string = "\\x0"; + EXPECT_TRUE(absl::CUnescape(original_string, &result_string_)); + EXPECT_EQ(std::string("\0", 1), result_string_); +} + +TEST_F(CUnescapeTest, Unescapes2CharHexNull) { + std::string original_string = "\\x00"; + EXPECT_TRUE(absl::CUnescape(original_string, &result_string_)); + EXPECT_EQ(std::string("\0", 1), result_string_); +} + +TEST_F(CUnescapeTest, Unescapes3CharHexNull) { + std::string original_string = "\\x000"; + EXPECT_TRUE(absl::CUnescape(original_string, &result_string_)); + EXPECT_EQ(std::string("\0", 1), result_string_); +} + +TEST_F(CUnescapeTest, Unescapes4CharUnicodeNull) { + std::string original_string = "\\u0000"; + EXPECT_TRUE(absl::CUnescape(original_string, &result_string_)); + EXPECT_EQ(std::string("\0", 1), result_string_); +} + +TEST_F(CUnescapeTest, Unescapes8CharUnicodeNull) { + std::string original_string = "\\U00000000"; + EXPECT_TRUE(absl::CUnescape(original_string, &result_string_)); + EXPECT_EQ(std::string("\0", 1), result_string_); +} + +TEST_F(CUnescapeTest, UnescapesMultipleOctalNulls) { + std::string original_string(kStringWithMultipleOctalNulls); + EXPECT_TRUE(absl::CUnescape(original_string, &result_string_)); + // All escapes, including newlines and null escapes, should have been + // converted to the equivalent characters. + EXPECT_EQ(std::string("\0\n" + "0\n" + "\0\n" + "\0", 7), result_string_); +} + + +TEST_F(CUnescapeTest, UnescapesMultipleHexNulls) { + std::string original_string(kStringWithMultipleHexNulls); + EXPECT_TRUE(absl::CUnescape(original_string, &result_string_)); + EXPECT_EQ(std::string("\0\n" + "0\n" + "\0\n" + "\0", 7), result_string_); +} + +TEST_F(CUnescapeTest, UnescapesMultipleUnicodeNulls) { + std::string original_string(kStringWithMultipleUnicodeNulls); + EXPECT_TRUE(absl::CUnescape(original_string, &result_string_)); + EXPECT_EQ(std::string("\0\n" + "0\n" + "\0", 5), result_string_); +} + +static struct { + absl::string_view plaintext; + absl::string_view cyphertext; +} const base64_tests[] = { + // Empty std::string. + {{"", 0}, {"", 0}}, + {{nullptr, 0}, + {"", 0}}, // if length is zero, plaintext ptr must be ignored! + + // Basic bit patterns; + // values obtained with "echo -n '...' | uuencode -m test" + + {{"\000", 1}, "AA=="}, + {{"\001", 1}, "AQ=="}, + {{"\002", 1}, "Ag=="}, + {{"\004", 1}, "BA=="}, + {{"\010", 1}, "CA=="}, + {{"\020", 1}, "EA=="}, + {{"\040", 1}, "IA=="}, + {{"\100", 1}, "QA=="}, + {{"\200", 1}, "gA=="}, + + {{"\377", 1}, "/w=="}, + {{"\376", 1}, "/g=="}, + {{"\375", 1}, "/Q=="}, + {{"\373", 1}, "+w=="}, + {{"\367", 1}, "9w=="}, + {{"\357", 1}, "7w=="}, + {{"\337", 1}, "3w=="}, + {{"\277", 1}, "vw=="}, + {{"\177", 1}, "fw=="}, + {{"\000\000", 2}, "AAA="}, + {{"\000\001", 2}, "AAE="}, + {{"\000\002", 2}, "AAI="}, + {{"\000\004", 2}, "AAQ="}, + {{"\000\010", 2}, "AAg="}, + {{"\000\020", 2}, "ABA="}, + {{"\000\040", 2}, "ACA="}, + {{"\000\100", 2}, "AEA="}, + {{"\000\200", 2}, "AIA="}, + {{"\001\000", 2}, "AQA="}, + {{"\002\000", 2}, "AgA="}, + {{"\004\000", 2}, "BAA="}, + {{"\010\000", 2}, "CAA="}, + {{"\020\000", 2}, "EAA="}, + {{"\040\000", 2}, "IAA="}, + {{"\100\000", 2}, "QAA="}, + {{"\200\000", 2}, "gAA="}, + + {{"\377\377", 2}, "//8="}, + {{"\377\376", 2}, "//4="}, + {{"\377\375", 2}, "//0="}, + {{"\377\373", 2}, "//s="}, + {{"\377\367", 2}, "//c="}, + {{"\377\357", 2}, "/+8="}, + {{"\377\337", 2}, "/98="}, + {{"\377\277", 2}, "/78="}, + {{"\377\177", 2}, "/38="}, + {{"\376\377", 2}, "/v8="}, + {{"\375\377", 2}, "/f8="}, + {{"\373\377", 2}, "+/8="}, + {{"\367\377", 2}, "9/8="}, + {{"\357\377", 2}, "7/8="}, + {{"\337\377", 2}, "3/8="}, + {{"\277\377", 2}, "v/8="}, + {{"\177\377", 2}, "f/8="}, + + {{"\000\000\000", 3}, "AAAA"}, + {{"\000\000\001", 3}, "AAAB"}, + {{"\000\000\002", 3}, "AAAC"}, + {{"\000\000\004", 3}, "AAAE"}, + {{"\000\000\010", 3}, "AAAI"}, + {{"\000\000\020", 3}, "AAAQ"}, + {{"\000\000\040", 3}, "AAAg"}, + {{"\000\000\100", 3}, "AABA"}, + {{"\000\000\200", 3}, "AACA"}, + {{"\000\001\000", 3}, "AAEA"}, + {{"\000\002\000", 3}, "AAIA"}, + {{"\000\004\000", 3}, "AAQA"}, + {{"\000\010\000", 3}, "AAgA"}, + {{"\000\020\000", 3}, "ABAA"}, + {{"\000\040\000", 3}, "ACAA"}, + {{"\000\100\000", 3}, "AEAA"}, + {{"\000\200\000", 3}, "AIAA"}, + {{"\001\000\000", 3}, "AQAA"}, + {{"\002\000\000", 3}, "AgAA"}, + {{"\004\000\000", 3}, "BAAA"}, + {{"\010\000\000", 3}, "CAAA"}, + {{"\020\000\000", 3}, "EAAA"}, + {{"\040\000\000", 3}, "IAAA"}, + {{"\100\000\000", 3}, "QAAA"}, + {{"\200\000\000", 3}, "gAAA"}, + + {{"\377\377\377", 3}, "////"}, + {{"\377\377\376", 3}, "///+"}, + {{"\377\377\375", 3}, "///9"}, + {{"\377\377\373", 3}, "///7"}, + {{"\377\377\367", 3}, "///3"}, + {{"\377\377\357", 3}, "///v"}, + {{"\377\377\337", 3}, "///f"}, + {{"\377\377\277", 3}, "//+/"}, + {{"\377\377\177", 3}, "//9/"}, + {{"\377\376\377", 3}, "//7/"}, + {{"\377\375\377", 3}, "//3/"}, + {{"\377\373\377", 3}, "//v/"}, + {{"\377\367\377", 3}, "//f/"}, + {{"\377\357\377", 3}, "/+//"}, + {{"\377\337\377", 3}, "/9//"}, + {{"\377\277\377", 3}, "/7//"}, + {{"\377\177\377", 3}, "/3//"}, + {{"\376\377\377", 3}, "/v//"}, + {{"\375\377\377", 3}, "/f//"}, + {{"\373\377\377", 3}, "+///"}, + {{"\367\377\377", 3}, "9///"}, + {{"\357\377\377", 3}, "7///"}, + {{"\337\377\377", 3}, "3///"}, + {{"\277\377\377", 3}, "v///"}, + {{"\177\377\377", 3}, "f///"}, + + // Random numbers: values obtained with + // + // #! /bin/bash + // dd bs=$1 count=1 if=/dev/random of=/tmp/bar.random + // od -N $1 -t o1 /tmp/bar.random + // uuencode -m test < /tmp/bar.random + // + // where $1 is the number of bytes (2, 3) + + {{"\243\361", 2}, "o/E="}, + {{"\024\167", 2}, "FHc="}, + {{"\313\252", 2}, "y6o="}, + {{"\046\041", 2}, "JiE="}, + {{"\145\236", 2}, "ZZ4="}, + {{"\254\325", 2}, "rNU="}, + {{"\061\330", 2}, "Mdg="}, + {{"\245\032", 2}, "pRo="}, + {{"\006\000", 2}, "BgA="}, + {{"\375\131", 2}, "/Vk="}, + {{"\303\210", 2}, "w4g="}, + {{"\040\037", 2}, "IB8="}, + {{"\261\372", 2}, "sfo="}, + {{"\335\014", 2}, "3Qw="}, + {{"\233\217", 2}, "m48="}, + {{"\373\056", 2}, "+y4="}, + {{"\247\232", 2}, "p5o="}, + {{"\107\053", 2}, "Rys="}, + {{"\204\077", 2}, "hD8="}, + {{"\276\211", 2}, "vok="}, + {{"\313\110", 2}, "y0g="}, + {{"\363\376", 2}, "8/4="}, + {{"\251\234", 2}, "qZw="}, + {{"\103\262", 2}, "Q7I="}, + {{"\142\312", 2}, "Yso="}, + {{"\067\211", 2}, "N4k="}, + {{"\220\001", 2}, "kAE="}, + {{"\152\240", 2}, "aqA="}, + {{"\367\061", 2}, "9zE="}, + {{"\133\255", 2}, "W60="}, + {{"\176\035", 2}, "fh0="}, + {{"\032\231", 2}, "Gpk="}, + + {{"\013\007\144", 3}, "Cwdk"}, + {{"\030\112\106", 3}, "GEpG"}, + {{"\047\325\046", 3}, "J9Um"}, + {{"\310\160\022", 3}, "yHAS"}, + {{"\131\100\237", 3}, "WUCf"}, + {{"\064\342\134", 3}, "NOJc"}, + {{"\010\177\004", 3}, "CH8E"}, + {{"\345\147\205", 3}, "5WeF"}, + {{"\300\343\360", 3}, "wOPw"}, + {{"\061\240\201", 3}, "MaCB"}, + {{"\225\333\044", 3}, "ldsk"}, + {{"\215\137\352", 3}, "jV/q"}, + {{"\371\147\160", 3}, "+Wdw"}, + {{"\030\320\051", 3}, "GNAp"}, + {{"\044\174\241", 3}, "JHyh"}, + {{"\260\127\037", 3}, "sFcf"}, + {{"\111\045\033", 3}, "SSUb"}, + {{"\202\114\107", 3}, "gkxH"}, + {{"\057\371\042", 3}, "L/ki"}, + {{"\223\247\244", 3}, "k6ek"}, + {{"\047\216\144", 3}, "J45k"}, + {{"\203\070\327", 3}, "gzjX"}, + {{"\247\140\072", 3}, "p2A6"}, + {{"\124\115\116", 3}, "VE1O"}, + {{"\157\162\050", 3}, "b3Io"}, + {{"\357\223\004", 3}, "75ME"}, + {{"\052\117\156", 3}, "Kk9u"}, + {{"\347\154\000", 3}, "52wA"}, + {{"\303\012\142", 3}, "wwpi"}, + {{"\060\035\362", 3}, "MB3y"}, + {{"\130\226\361", 3}, "WJbx"}, + {{"\173\013\071", 3}, "ews5"}, + {{"\336\004\027", 3}, "3gQX"}, + {{"\357\366\234", 3}, "7/ac"}, + {{"\353\304\111", 3}, "68RJ"}, + {{"\024\264\131", 3}, "FLRZ"}, + {{"\075\114\251", 3}, "PUyp"}, + {{"\315\031\225", 3}, "zRmV"}, + {{"\154\201\276", 3}, "bIG+"}, + {{"\200\066\072", 3}, "gDY6"}, + {{"\142\350\267", 3}, "Yui3"}, + {{"\033\000\166", 3}, "GwB2"}, + {{"\210\055\077", 3}, "iC0/"}, + {{"\341\037\124", 3}, "4R9U"}, + {{"\161\103\152", 3}, "cUNq"}, + {{"\270\142\131", 3}, "uGJZ"}, + {{"\337\076\074", 3}, "3z48"}, + {{"\375\106\362", 3}, "/Uby"}, + {{"\227\301\127", 3}, "l8FX"}, + {{"\340\002\234", 3}, "4AKc"}, + {{"\121\064\033", 3}, "UTQb"}, + {{"\157\134\143", 3}, "b1xj"}, + {{"\247\055\327", 3}, "py3X"}, + {{"\340\142\005", 3}, "4GIF"}, + {{"\060\260\143", 3}, "MLBj"}, + {{"\075\203\170", 3}, "PYN4"}, + {{"\143\160\016", 3}, "Y3AO"}, + {{"\313\013\063", 3}, "ywsz"}, + {{"\174\236\135", 3}, "fJ5d"}, + {{"\103\047\026", 3}, "QycW"}, + {{"\365\005\343", 3}, "9QXj"}, + {{"\271\160\223", 3}, "uXCT"}, + {{"\362\255\172", 3}, "8q16"}, + {{"\113\012\015", 3}, "SwoN"}, + + // various lengths, generated by this python script: + // + // from std::string import lowercase as lc + // for i in range(27): + // print '{ %2d, "%s",%s "%s" },' % (i, lc[:i], ' ' * (26-i), + // lc[:i].encode('base64').strip()) + + {{"", 0}, {"", 0}}, + {"a", "YQ=="}, + {"ab", "YWI="}, + {"abc", "YWJj"}, + {"abcd", "YWJjZA=="}, + {"abcde", "YWJjZGU="}, + {"abcdef", "YWJjZGVm"}, + {"abcdefg", "YWJjZGVmZw=="}, + {"abcdefgh", "YWJjZGVmZ2g="}, + {"abcdefghi", "YWJjZGVmZ2hp"}, + {"abcdefghij", "YWJjZGVmZ2hpag=="}, + {"abcdefghijk", "YWJjZGVmZ2hpams="}, + {"abcdefghijkl", "YWJjZGVmZ2hpamts"}, + {"abcdefghijklm", "YWJjZGVmZ2hpamtsbQ=="}, + {"abcdefghijklmn", "YWJjZGVmZ2hpamtsbW4="}, + {"abcdefghijklmno", "YWJjZGVmZ2hpamtsbW5v"}, + {"abcdefghijklmnop", "YWJjZGVmZ2hpamtsbW5vcA=="}, + {"abcdefghijklmnopq", "YWJjZGVmZ2hpamtsbW5vcHE="}, + {"abcdefghijklmnopqr", "YWJjZGVmZ2hpamtsbW5vcHFy"}, + {"abcdefghijklmnopqrs", "YWJjZGVmZ2hpamtsbW5vcHFycw=="}, + {"abcdefghijklmnopqrst", "YWJjZGVmZ2hpamtsbW5vcHFyc3Q="}, + {"abcdefghijklmnopqrstu", "YWJjZGVmZ2hpamtsbW5vcHFyc3R1"}, + {"abcdefghijklmnopqrstuv", "YWJjZGVmZ2hpamtsbW5vcHFyc3R1dg=="}, + {"abcdefghijklmnopqrstuvw", "YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnc="}, + {"abcdefghijklmnopqrstuvwx", "YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4"}, + {"abcdefghijklmnopqrstuvwxy", "YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eQ=="}, + {"abcdefghijklmnopqrstuvwxyz", "YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXo="}, +}; + +TEST(Base64, EscapeAndUnescape) { + // Check the short strings; this tests the math (and boundaries) + for (const auto& tc : base64_tests) { + std::string encoded("this junk should be ignored"); + absl::Base64Escape(tc.plaintext, &encoded); + EXPECT_EQ(encoded, tc.cyphertext); + + std::string decoded("this junk should be ignored"); + EXPECT_TRUE(absl::Base64Unescape(encoded, &decoded)); + EXPECT_EQ(decoded, tc.plaintext); + + std::string websafe(tc.cyphertext); + for (int c = 0; c < websafe.size(); ++c) { + if ('+' == websafe[c]) websafe[c] = '-'; + if ('/' == websafe[c]) websafe[c] = '_'; + if ('=' == websafe[c]) { + websafe.resize(c); + break; + } + } + + encoded = "this junk should be ignored"; + absl::WebSafeBase64Escape(tc.plaintext, &encoded); + EXPECT_EQ(encoded, websafe); + + // Let's try the std::string version of the decoder + decoded = "this junk should be ignored"; + EXPECT_TRUE(absl::WebSafeBase64Unescape(websafe, &decoded)); + EXPECT_EQ(decoded, tc.plaintext); + } + + // Now try the long strings, this tests the streaming + for (const auto& tc : base64_strings) { + std::string buffer; + absl::WebSafeBase64Escape(tc.plaintext, &buffer); + EXPECT_EQ(tc.cyphertext, buffer); + } + + // Verify the behavior when decoding bad data + { + absl::string_view data_set[] = {"ab-/", absl::string_view("\0bcd", 4), + absl::string_view("abc.\0", 5)}; + for (absl::string_view bad_data : data_set) { + std::string buf; + EXPECT_FALSE(absl::Base64Unescape(bad_data, &buf)); + EXPECT_FALSE(absl::WebSafeBase64Unescape(bad_data, &buf)); + EXPECT_TRUE(buf.empty()); + } + } +} + +TEST(Base64, DISABLED_HugeData) { + const size_t kSize = size_t(3) * 1000 * 1000 * 1000; + static_assert(kSize % 3 == 0, "kSize must be divisible by 3"); + const std::string huge(kSize, 'x'); + + std::string escaped; + absl::Base64Escape(huge, &escaped); + + // Generates the std::string that should match a base64 encoded "xxx..." std::string. + // "xxx" in base64 is "eHh4". + std::string expected_encoding; + expected_encoding.reserve(kSize / 3 * 4); + for (size_t i = 0; i < kSize / 3; ++i) { + expected_encoding.append("eHh4"); + } + EXPECT_EQ(expected_encoding, escaped); + + std::string unescaped; + EXPECT_TRUE(absl::Base64Unescape(escaped, &unescaped)); + EXPECT_EQ(huge, unescaped); +} + +TEST(HexAndBack, HexStringToBytes_and_BytesToHexString) { + std::string hex_mixed = "0123456789abcdefABCDEF"; + std::string bytes_expected = "\x01\x23\x45\x67\x89\xab\xcd\xef\xAB\xCD\xEF"; + std::string hex_only_lower = "0123456789abcdefabcdef"; + + std::string bytes_result = absl::HexStringToBytes(hex_mixed); + EXPECT_EQ(bytes_expected, bytes_result); + + std::string prefix_valid = hex_mixed + "?"; + std::string prefix_valid_result = absl::HexStringToBytes( + absl::string_view(prefix_valid.data(), prefix_valid.size() - 1)); + EXPECT_EQ(bytes_expected, prefix_valid_result); + + std::string infix_valid = "?" + hex_mixed + "???"; + std::string infix_valid_result = absl::HexStringToBytes( + absl::string_view(infix_valid.data() + 1, hex_mixed.size())); + EXPECT_EQ(bytes_expected, infix_valid_result); + + std::string hex_result = absl::BytesToHexString(bytes_expected); + EXPECT_EQ(hex_only_lower, hex_result); +} + +} // namespace
