This is an automated email from the ASF dual-hosted git repository.
gangwu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-cpp.git
The following commit(s) were added to refs/heads/main by this push:
new 68fe3813 feat: add simple url encoder & decoder (#457)
68fe3813 is described below
commit 68fe381366338a5d86b13dbd611a0b3f10212905
Author: wzhuo <[email protected]>
AuthorDate: Mon Jan 5 20:48:42 2026 +0800
feat: add simple url encoder & decoder (#457)
---
src/iceberg/CMakeLists.txt | 1 +
src/iceberg/meson.build | 1 +
src/iceberg/test/CMakeLists.txt | 1 +
src/iceberg/test/meson.build | 1 +
src/iceberg/test/url_encoder_test.cc | 83 +++++++++++++++++++++++++++++++++++
src/iceberg/util/meson.build | 1 +
src/iceberg/util/url_encoder.cc | 84 ++++++++++++++++++++++++++++++++++++
src/iceberg/util/url_encoder.h | 53 +++++++++++++++++++++++
8 files changed, 225 insertions(+)
diff --git a/src/iceberg/CMakeLists.txt b/src/iceberg/CMakeLists.txt
index 617ec05b..2ecd652f 100644
--- a/src/iceberg/CMakeLists.txt
+++ b/src/iceberg/CMakeLists.txt
@@ -97,6 +97,7 @@ set(ICEBERG_SOURCES
util/timepoint.cc
util/truncate_util.cc
util/type_util.cc
+ util/url_encoder.cc
util/uuid.cc)
set(ICEBERG_STATIC_BUILD_INTERFACE_LIBS)
diff --git a/src/iceberg/meson.build b/src/iceberg/meson.build
index 34538bde..78ebd604 100644
--- a/src/iceberg/meson.build
+++ b/src/iceberg/meson.build
@@ -118,6 +118,7 @@ iceberg_sources = files(
'util/timepoint.cc',
'util/truncate_util.cc',
'util/type_util.cc',
+ 'util/url_encoder.cc',
'util/uuid.cc',
)
diff --git a/src/iceberg/test/CMakeLists.txt b/src/iceberg/test/CMakeLists.txt
index a32bbe4d..e93852aa 100644
--- a/src/iceberg/test/CMakeLists.txt
+++ b/src/iceberg/test/CMakeLists.txt
@@ -108,6 +108,7 @@ add_iceberg_test(util_test
location_util_test.cc
string_util_test.cc
truncate_util_test.cc
+ url_encoder_test.cc
uuid_test.cc
visit_type_test.cc)
diff --git a/src/iceberg/test/meson.build b/src/iceberg/test/meson.build
index 37818281..50422ccc 100644
--- a/src/iceberg/test/meson.build
+++ b/src/iceberg/test/meson.build
@@ -88,6 +88,7 @@ iceberg_tests = {
'location_util_test.cc',
'string_util_test.cc',
'truncate_util_test.cc',
+ 'url_encoder_test.cc',
'uuid_test.cc',
'visit_type_test.cc',
),
diff --git a/src/iceberg/test/url_encoder_test.cc
b/src/iceberg/test/url_encoder_test.cc
new file mode 100644
index 00000000..fe22a9bb
--- /dev/null
+++ b/src/iceberg/test/url_encoder_test.cc
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "iceberg/util/url_encoder.h"
+
+#include <gtest/gtest.h>
+
+#include "iceberg/test/matchers.h"
+
+namespace iceberg {
+
+TEST(UrlEncoderTest, Encode) {
+ // RFC 3986 unreserved characters should not be encoded
+ EXPECT_THAT(UrlEncoder::Encode("abc123XYZ"), ::testing::Eq("abc123XYZ"));
+ EXPECT_THAT(UrlEncoder::Encode("test-file_name.txt~backup"),
+ ::testing::Eq("test-file_name.txt~backup"));
+
+ // Spaces and special characters should be encoded
+ EXPECT_THAT(UrlEncoder::Encode("hello world"),
::testing::Eq("hello%20world"));
+ EXPECT_THAT(UrlEncoder::Encode("[email protected]"),
+ ::testing::Eq("test%40example.com"));
+ EXPECT_THAT(UrlEncoder::Encode("path/to/file"),
::testing::Eq("path%2Fto%2Ffile"));
+ EXPECT_THAT(UrlEncoder::Encode("key=value&foo=bar"),
+ ::testing::Eq("key%3Dvalue%26foo%3Dbar"));
+ EXPECT_THAT(UrlEncoder::Encode("100%"), ::testing::Eq("100%25"));
+ EXPECT_THAT(UrlEncoder::Encode("hello\x1fworld"),
::testing::Eq("hello%1Fworld"));
+ EXPECT_THAT(UrlEncoder::Encode(""), ::testing::Eq(""));
+}
+
+TEST(UrlEncoderTest, Decode) {
+ // Decode percent-encoded strings
+ EXPECT_THAT(UrlEncoder::Decode("hello%20world"), ::testing::Eq("hello
world"));
+ EXPECT_THAT(UrlEncoder::Decode("test%40example.com"),
+ ::testing::Eq("[email protected]"));
+ EXPECT_THAT(UrlEncoder::Decode("path%2fto%2Ffile"),
::testing::Eq("path/to/file"));
+ EXPECT_THAT(UrlEncoder::Decode("key%3dvalue%26foo%3Dbar"),
+ ::testing::Eq("key=value&foo=bar"));
+ EXPECT_THAT(UrlEncoder::Decode("100%25"), ::testing::Eq("100%"));
+
+ // ASCII Unit Separator (0x1F)
+ EXPECT_THAT(UrlEncoder::Decode("hello%1Fworld"),
::testing::Eq("hello\x1Fworld"));
+
+ // Unreserved characters remain unchanged
+ EXPECT_THAT(UrlEncoder::Decode("test-file_name.txt~backup"),
+ ::testing::Eq("test-file_name.txt~backup"));
+ EXPECT_THAT(UrlEncoder::Decode(""), ::testing::Eq(""));
+}
+
+TEST(UrlEncoderTest, EncodeDecodeRoundTrip) {
+ std::vector<std::string> test_cases = {"hello world",
+ "[email protected]",
+ "path/to/file",
+ "key=value&foo=bar",
+ "100%",
+ "hello\x1Fworld",
+ "special!@#$%^&*()chars",
+ "mixed-123_test.file~ok",
+ ""};
+
+ for (const auto& test : test_cases) {
+ std::string encoded = UrlEncoder::Encode(test);
+ std::string decoded = UrlEncoder::Decode(encoded);
+ EXPECT_EQ(decoded, test) << "Round-trip failed for: " << test;
+ }
+}
+
+} // namespace iceberg
diff --git a/src/iceberg/util/meson.build b/src/iceberg/util/meson.build
index 880f6340..b3866b70 100644
--- a/src/iceberg/util/meson.build
+++ b/src/iceberg/util/meson.build
@@ -38,6 +38,7 @@ install_headers(
'timepoint.h',
'truncate_util.h',
'type_util.h',
+ 'url_encoder.h',
'uuid.h',
'visitor_generate.h',
'visit_type.h',
diff --git a/src/iceberg/util/url_encoder.cc b/src/iceberg/util/url_encoder.cc
new file mode 100644
index 00000000..a6ae4fd8
--- /dev/null
+++ b/src/iceberg/util/url_encoder.cc
@@ -0,0 +1,84 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "iceberg/util/url_encoder.h"
+
+#include <locale>
+
+namespace iceberg {
+
+namespace {
+
+bool IsUnreserved(unsigned char c) {
+ return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') || (c >= 'A' && c <=
'Z') ||
+ c == '-' || c == '.' || c == '_' || c == '~';
+}
+
+// Helper: convert hex char to int (0–15), returns -1 if invalid
+constexpr int8_t FromHex(char c) {
+ if (c >= '0' && c <= '9') return c - '0';
+ if (c >= 'A' && c <= 'F') return c - 'A' + 10;
+ if (c >= 'a' && c <= 'f') return c - 'a' + 10;
+ return -1;
+}
+
+} // namespace
+
+std::string UrlEncoder::Encode(std::string_view str_to_encode) {
+ static const char* kHexChars = "0123456789ABCDEF";
+ std::string result;
+ result.reserve(str_to_encode.size() * 3 / 2 /* Heuristic reservation */);
+
+ for (char c : str_to_encode) {
+ if (IsUnreserved(c)) {
+ result += c;
+ } else {
+ result += '%';
+ result += kHexChars[c >> 4];
+ result += kHexChars[c & 0xF];
+ }
+ }
+
+ return result;
+}
+
+std::string UrlEncoder::Decode(std::string_view str_to_decode) {
+ std::string result;
+ result.reserve(str_to_decode.size());
+
+ for (size_t i = 0; i < str_to_decode.size(); ++i) {
+ char c = str_to_decode[i];
+ if (c == '%' && i + 2 < str_to_decode.size()) {
+ int8_t hi = FromHex(str_to_decode[i + 1]);
+ int8_t lo = FromHex(str_to_decode[i + 2]);
+
+ if (hi != -1 && lo != -1) {
+ result += static_cast<char>((hi << 4) | lo);
+ i += 2;
+ continue;
+ }
+ }
+ // Not a valid %XX sequence, copy as-is
+ result += c;
+ }
+
+ return result;
+}
+
+} // namespace iceberg
diff --git a/src/iceberg/util/url_encoder.h b/src/iceberg/util/url_encoder.h
new file mode 100644
index 00000000..c6b8dd70
--- /dev/null
+++ b/src/iceberg/util/url_encoder.h
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+#include <string>
+#include <string_view>
+
+#include "iceberg/iceberg_export.h"
+
+/// \file iceberg/util/url_encoder.h
+/// \brief URL encoding and decoding.
+
+namespace iceberg {
+
+/// \brief Utilities for encoding and decoding URLs.
+class ICEBERG_EXPORT UrlEncoder {
+ public:
+ /// \brief URL-encode a string.
+ ///
+ /// \details This is a simple implementation of url-encode
+ /// - Unreserved characters: [A-Z], [a-z], [0-9], "-", "_", ".", "~"
+ /// - Space is encoded as "%20" (unlike Java's URLEncoder which uses "+").
+ /// - All other characters are percent-encoded (%XX).
+ /// \param str_to_encode The string to encode.
+ /// \return The URL-encoded string.
+ static std::string Encode(std::string_view str_to_encode);
+
+ /// \brief URL-decode a string.
+ ///
+ /// \details Decodes percent-encoded characters (e.g., "%20" -> space).
+ /// \param str_to_decode The encoded string to decode.
+ /// \return The decoded string.
+ static std::string Decode(std::string_view str_to_decode);
+};
+
+} // namespace iceberg