(trafficserver) 07/34: tscore: fix out-of-bounds read in ats_base64_decode (#13210)

cmcfarlen Mon, 15 Jun 2026 19:17:40 -0700

This is an automated email from the ASF dual-hosted git repository.

cmcfarlen pushed a commit to branch 10.2.x
in repository https://gitbox.apache.org/repos/asf/trafficserver.git


commit ec344e4ecf37591f1f64e282f44ff244fca23555
Author: Phong Nguyen <[email protected]>
AuthorDate: Mon Jun 1 11:52:29 2026 -0700

    tscore: fix out-of-bounds read in ats_base64_decode (#13210)
    
    When the base64 alphabet prefix length was not a multiple of four,
    the decode loop ran one iteration past the prefix, reading input
    bytes out of bounds and then reading inBuffer[-2] in the trailing
    adjustment. Found with AddressSanitizer.
    
    Decode only complete 4-character groups and handle a 2- or
    3-character tail explicitly, dropping a lone trailing character.
    Decoded length and bytes are unchanged for every well-defined
    input; only the out-of-bounds reads are removed.
    
    Add unit_tests/test_ink_base64.cc covering both alphabets,
    round-trips, in-place decode, undersized buffers, and truncation.
    Inputs sit in exact-size heap buffers so ASan faults on the old
    decoder and passes after the fix.
    
    (cherry picked from commit 1e1dd1aabeef958d3d876296d0411b8250cfbfcb)
---
 src/tscore/CMakeLists.txt                |   1 +
 src/tscore/ink_base64.cc                 |  51 +++---
 src/tscore/unit_tests/test_ink_base64.cc | 256 +++++++++++++++++++++++++++++++
 3 files changed, 284 insertions(+), 24 deletions(-)

diff --git a/src/tscore/CMakeLists.txt b/src/tscore/CMakeLists.txt
index 7790adc87d..4c4c607318 100644
--- a/src/tscore/CMakeLists.txt
+++ b/src/tscore/CMakeLists.txt
@@ -158,6 +158,7 @@ if(BUILD_TESTING)
     unit_tests/test_Throttler.cc
     unit_tests/test_Tokenizer.cc
     unit_tests/test_arena.cc
+    unit_tests/test_ink_base64.cc
     unit_tests/test_ink_inet.cc
     unit_tests/test_ink_memory.cc
     unit_tests/test_ink_string.cc
diff --git a/src/tscore/ink_base64.cc b/src/tscore/ink_base64.cc
index 849d7c8ce8..208626f526 100644
--- a/src/tscore/ink_base64.cc
+++ b/src/tscore/ink_base64.cc
@@ -33,8 +33,6 @@
 #include "tscore/ink_base64.h"
 #include "tscore/ink_assert.h"
 
-// TODO: The code here seems a bit klunky, and could probably be improved a 
bit.
-
 bool
 ats_base64_encode(const unsigned char *inBuffer, size_t inBufferSize, char 
*outBuffer, size_t outBufSize, size_t *length)
 {
@@ -122,42 +120,47 @@ const unsigned char printableToSixBit[256] = {
 bool
 ats_base64_decode(const char *inBuffer, size_t inBufferSize, unsigned char 
*outBuffer, size_t outBufSize, size_t *length)
 {
-  size_t         inBytes           = 0;
-  size_t         decodedBytes      = 0;
-  unsigned char *buf               = outBuffer;
-  int            inputBytesDecoded = 0;
+  size_t         decodedBytes = 0;
+  unsigned char *buf          = outBuffer;
 
   // Make sure there is sufficient space in the output buffer
   if (outBufSize < ats_base64_decode_dstlen(inBufferSize)) {
     return false;
   }
 
-  // Ignore any trailing ='s or other undecodable characters.
+  // Ignore any trailing ='s or other undecodable characters: consume only the
+  // leading run of base64-alphabet bytes.
   // TODO: Perhaps that ought to be an error instead?
-  while (inBytes < inBufferSize && 
printableToSixBit[static_cast<uint8_t>(inBuffer[inBytes])] <= MAX_PRINT_VAL) {
+  size_t inBytes = 0;
+  while (inBytes < inBufferSize && DECODE(inBuffer[inBytes]) <= MAX_PRINT_VAL) 
{
     ++inBytes;
   }
 
-  for (size_t i = 0; i < inBytes; i += 4) {
-    buf[0] = static_cast<unsigned char>(DECODE(inBuffer[0]) << 2 | 
DECODE(inBuffer[1]) >> 4);
-    buf[1] = static_cast<unsigned char>(DECODE(inBuffer[1]) << 4 | 
DECODE(inBuffer[2]) >> 2);
-    buf[2] = static_cast<unsigned char>(DECODE(inBuffer[2]) << 6 | 
DECODE(inBuffer[3]));
-
-    buf               += 3;
-    inBuffer          += 4;
-    decodedBytes      += 3;
-    inputBytesDecoded += 4;
+  // Decode complete 4-character groups into 3 bytes each. Process only whole
+  // groups here so the loop never reads past the alphabet prefix; the previous
+  // code ran one extra iteration when inBytes was not a multiple of four (a
+  // read out of bounds of the input) and then read inBuffer[-2].
+  while (inBytes >= 4) {
+    buf[0]        = static_cast<unsigned char>(DECODE(inBuffer[0]) << 2 | 
DECODE(inBuffer[1]) >> 4);
+    buf[1]        = static_cast<unsigned char>(DECODE(inBuffer[1]) << 4 | 
DECODE(inBuffer[2]) >> 2);
+    buf[2]        = static_cast<unsigned char>(DECODE(inBuffer[2]) << 6 | 
DECODE(inBuffer[3]));
+    buf          += 3;
+    inBuffer     += 4;
+    decodedBytes += 3;
+    inBytes      -= 4;
   }
 
-  // Check to see if we decoded a multiple of 4 four
-  //    bytes
-  if ((inBytes - inputBytesDecoded) & 0x3) {
-    if (DECODE(inBuffer[-2]) > MAX_PRINT_VAL) {
-      decodedBytes -= 2;
-    } else {
-      decodedBytes -= 1;
+  // Decode a trailing 2- or 3-character group; a lone trailing character does
+  // not encode a full byte and is dropped (as an RFC 4648 decoder requires).
+  if (inBytes >= 2) {
+    buf[0]        = static_cast<unsigned char>(DECODE(inBuffer[0]) << 2 | 
DECODE(inBuffer[1]) >> 4);
+    decodedBytes += 1;
+    if (inBytes >= 3) {
+      buf[1]        = static_cast<unsigned char>(DECODE(inBuffer[1]) << 4 | 
DECODE(inBuffer[2]) >> 2);
+      decodedBytes += 1;
     }
   }
+
   outBuffer[decodedBytes] = '\0';
 
   if (length) {
diff --git a/src/tscore/unit_tests/test_ink_base64.cc 
b/src/tscore/unit_tests/test_ink_base64.cc
new file mode 100644
index 0000000000..bc7ac39816
--- /dev/null
+++ b/src/tscore/unit_tests/test_ink_base64.cc
@@ -0,0 +1,256 @@
+/** @file
+
+  Unit tests for ats_base64_encode / ats_base64_decode.
+
+  Includes a regression test for the out-of-bounds read that occurred when the
+  decodable prefix length was not a multiple of four. The decode helper places
+  the input in an exact-size heap buffer so that, under AddressSanitizer, any
+  read past the input (as the old decoder did) is caught.
+
+  @section license License
+
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+ */
+
+#include <cstddef> // size_t, used by ink_base64.h
+
+#include <tscore/ink_base64.h>
+
+#include <catch2/catch_test_macros.hpp>
+
+#include <algorithm>
+#include <cstdint>
+#include <cstring>
+#include <random>
+#include <string>
+#include <vector>
+
+namespace
+{
+// Decode `b64` with the input held in an EXACT-size heap buffer, so a read
+// past the input trips AddressSanitizer's red zone (this is what the
+// out-of-bounds bug did for prefixes whose length is not a multiple of four).
+std::string
+decode_tight(const std::string &b64)
+{
+  std::vector<char> in(b64.size() ? b64.size() : 1);
+  if (!b64.empty()) {
+    std::memcpy(in.data(), b64.data(), b64.size());
+  }
+
+  // ats_base64_decode_dstlen() already includes the trailing NUL, so this is
+  // the exact documented minimum -- no slack, so any write at or past the
+  // bound trips ASan.
+  std::vector<unsigned char> out(ats_base64_decode_dstlen(b64.size()), 0xCC);
+  size_t                     n  = 0;
+  bool                       ok = ats_base64_decode(in.data(), b64.size(), 
out.data(), out.size(), &n);
+  REQUIRE(ok);
+  REQUIRE(out[n] == '\0'); // trailing NUL contract
+  return std::string(reinterpret_cast<char *>(out.data()), n);
+}
+
+std::string
+encode(const std::string &bin)
+{
+  std::vector<char> out(ats_base64_encode_dstlen(bin.size()), '\0');
+  size_t            n  = 0;
+  bool              ok = ats_base64_encode(bin.data(), bin.size(), out.data(), 
out.size(), &n);
+  REQUIRE(ok);
+  REQUIRE(out[n] == '\0');
+  return std::string(out.data(), n);
+}
+
+const std::string kAlpha = 
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
+} // namespace
+
+TEST_CASE("ats_base64 known vectors", "[base64]")
+{
+  // RFC 4648 §10.
+  CHECK(encode("") == "");
+  CHECK(encode("f") == "Zg==");
+  CHECK(encode("fo") == "Zm8=");
+  CHECK(encode("foo") == "Zm9v");
+  CHECK(encode("foob") == "Zm9vYg==");
+  CHECK(encode("fooba") == "Zm9vYmE=");
+  CHECK(encode("foobar") == "Zm9vYmFy");
+
+  CHECK(decode_tight("Zm9vYmFy") == "foobar");
+}
+
+TEST_CASE("ats_base64_decode does not read past a non-4-aligned prefix", 
"[base64]")
+{
+  // Regression: a decodable prefix whose length is not a multiple of four made
+  // the old decoder run an extra loop iteration past the prefix (out of bounds
+  // of the input when the buffer ends there) and read inBuffer[-2]. These
+  // unpadded inputs exercise prefix lengths 2, 3, 6, 7 (i.e. 2 and 3 mod 4);
+  // decode_tight runs them in exact-size buffers so ASan would catch any
+  // over-read.
+  CHECK(decode_tight("") == "");
+  CHECK(decode_tight("Zg") == "f");          // 2 chars  -> 1 byte
+  CHECK(decode_tight("Zm8") == "fo");        // 3 chars  -> 2 bytes
+  CHECK(decode_tight("Zm9v") == "foo");      // 4 chars  -> 3 bytes
+  CHECK(decode_tight("Zm9vYg") == "foob");   // 6 chars  -> 4 bytes
+  CHECK(decode_tight("Zm9vYmE") == "fooba"); // 7 chars  -> 5 bytes
+
+  // A lone trailing character does not encode a full byte and is dropped.
+  CHECK(decode_tight("Zm9vYg==").size() == 4);
+  CHECK(decode_tight("Q") == "");
+
+  // Sweep every prefix length (hence every length mod 4) in an exact-size
+  // buffer; assert the decoded length matches the RFC formula for that many
+  // alphabet characters and rely on ASan for over-read detection.
+  for (size_t len = 0; len <= 300; ++len) {
+    std::string s;
+    s.reserve(len);
+    for (size_t k = 0; k < len; ++k) {
+      s.push_back(kAlpha[(k * 7 + 3) % 64]);
+    }
+    const size_t rem      = len % 4;
+    const size_t expected = (len / 4) * 3 + (rem ? rem - 1 : 0);
+    INFO("len=" << len);
+    CHECK(decode_tight(s).size() == expected);
+  }
+}
+
+TEST_CASE("ats_base64 round-trips across sizes", "[base64]")
+{
+  std::mt19937                            rng(20240601);
+  std::uniform_int_distribution<unsigned> byte(0, 255);
+
+  for (size_t n = 0; n <= 256; ++n) {
+    std::string bin(n, '\0');
+    for (auto &c : bin) {
+      c = static_cast<char>(byte(rng));
+    }
+    INFO("size=" << n);
+    CHECK(decode_tight(encode(bin)) == bin);
+  }
+}
+
+TEST_CASE("ats_base64_decode accepts the URL-safe alphabet", "[base64]")
+{
+  // '-' and '_' map to the same six-bit values as '+' and '/', so a URL-safe
+  // string decodes to the same bytes as its standard-alphabet equivalent.
+  CHECK(decode_tight("____") == decode_tight("////"));
+  CHECK(decode_tight("----") == decode_tight("++++"));
+
+  // Round-trip through the URL-safe alphabet: encode (standard), translate
+  // '+'/'/' to '-'/'_', decode, and expect the original bytes back.
+  std::mt19937                            rng(99);
+  std::uniform_int_distribution<unsigned> byte(0, 255);
+  for (size_t n = 0; n <= 200; ++n) {
+    std::string bin(n, '\0');
+    for (auto &c : bin) {
+      c = static_cast<char>(byte(rng));
+    }
+    std::string url = encode(bin);
+    for (auto &c : url) {
+      if (c == '+') {
+        c = '-';
+      } else if (c == '/') {
+        c = '_';
+      }
+    }
+    INFO("size=" << n);
+    CHECK(decode_tight(url) == bin);
+  }
+}
+
+TEST_CASE("ats_base64_decode accepts both alphabets mixed in one input", 
"[base64]")
+{
+  // The standard and URL-safe punctuation may appear in the same input.
+  std::mt19937                            rng(1234);
+  std::uniform_int_distribution<unsigned> byte(0, 255);
+  std::uniform_int_distribution<int>      coin(0, 1);
+  for (size_t n = 0; n <= 200; ++n) {
+    std::string bin(n, '\0');
+    for (auto &c : bin) {
+      c = static_cast<char>(byte(rng));
+    }
+    std::string enc = encode(bin);
+    for (auto &c : enc) {
+      if (c == '+' && coin(rng)) {
+        c = '-';
+      } else if (c == '/' && coin(rng)) {
+        c = '_';
+      }
+    }
+    INFO("size=" << n);
+    CHECK(decode_tight(enc) == bin);
+  }
+}
+
+TEST_CASE("ats_base64_decode truncates at the first non-alphabet byte", 
"[base64]")
+{
+  // A non-alphabet byte (whitespace, '=', or other garbage) ends the decodable
+  // input; decoding stops there and yields the decode of the prefix before it.
+  // Injecting it at every position also exercises the over-read fix under 
ASan.
+  const char                        *terminators = " \t\n\r=*@";
+  std::mt19937                       rng(55);
+  std::uniform_int_distribution<int> pick(0, 63);
+  for (size_t len : {1u, 2u, 3u, 4u, 5u, 7u, 8u, 16u, 17u, 31u, 33u, 64u, 
100u}) {
+    std::string base;
+    for (size_t k = 0; k < len; ++k) {
+      base.push_back(kAlpha[pick(rng)]);
+    }
+    for (size_t pos = 0; pos < len; ++pos) {
+      for (const char *t = terminators; *t; ++t) {
+        std::string s = base;
+        s[pos]        = *t;
+        INFO("len=" << len << " pos=" << pos << " term=" << 
static_cast<int>(*t));
+        CHECK(decode_tight(s) == decode_tight(base.substr(0, pos)));
+      }
+    }
+  }
+}
+
+TEST_CASE("ats_base64_decode supports in-place (dst == src)", "[base64]")
+{
+  std::mt19937                            rng(7);
+  std::uniform_int_distribution<unsigned> byte(0, 255);
+
+  for (size_t n : {0u, 1u, 2u, 3u, 10u, 33u, 48u, 64u, 100u, 257u}) {
+    std::string bin(n, '\0');
+    for (auto &c : bin) {
+      c = static_cast<char>(byte(rng));
+    }
+    const std::string enc    = encode(bin);
+    const std::string expect = decode_tight(enc);
+
+    // Large enough to hold both the input copied in and the decoded output.
+    std::vector<char> buf(std::max(enc.size(), 
ats_base64_decode_dstlen(enc.size())), '\0');
+    std::copy(enc.begin(), enc.end(), buf.begin());
+    size_t n_out = 0;
+    bool   ok    = ats_base64_decode(buf.data(), enc.size(), 
reinterpret_cast<unsigned char *>(buf.data()), buf.size(), &n_out);
+    INFO("size=" << n);
+    CHECK(ok);
+    CHECK(std::string(buf.data(), n_out) == expect);
+  }
+}
+
+TEST_CASE("ats_base64 rejects undersized output buffers", "[base64]")
+{
+  const std::string bin = "hello world, base64";
+  const std::string enc = encode(bin);
+  size_t            n   = 0;
+
+  std::vector<char> small_enc(ats_base64_encode_dstlen(bin.size()) - 1);
+  CHECK_FALSE(ats_base64_encode(bin.data(), bin.size(), small_enc.data(), 
small_enc.size(), &n));
+
+  std::vector<unsigned char> small_dec(ats_base64_decode_dstlen(enc.size()) - 
1);
+  CHECK_FALSE(ats_base64_decode(enc.data(), enc.size(), small_dec.data(), 
small_dec.size(), &n));
+}

(trafficserver) 07/34: tscore: fix out-of-bounds read in ats_base64_decode (#13210)

Reply via email to