This is an automated email from the ASF dual-hosted git repository.
zhangstar333 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 0b29a76838d [Opt](exec) use libbase64 to replace base64 code in doris
(#32078)
0b29a76838d is described below
commit 0b29a76838d9fb7ecebd975440afb23cf37f18db
Author: HappenLee <[email protected]>
AuthorDate: Wed Mar 13 10:23:39 2024 +0800
[Opt](exec) use libbase64 to replace base64 code in doris (#32078)
* [Opt](exec) use libbase64 to replace base64 code in doris
---
be/cmake/thirdparty.cmake | 1 +
be/src/exec/olap_utils.h | 36 -----
be/src/util/url_coding.cpp | 175 +++------------------
be/src/vec/functions/function_bitmap.cpp | 6 +-
be/src/vec/functions/function_string.cpp | 6 +-
.../load_p0/stream_load/test_stream_load.groovy | 2 +-
.../test_stream_load_move_memtable.groovy | 8 +-
7 files changed, 35 insertions(+), 199 deletions(-)
diff --git a/be/cmake/thirdparty.cmake b/be/cmake/thirdparty.cmake
index 0d485b2466e..e9fbdabee8b 100644
--- a/be/cmake/thirdparty.cmake
+++ b/be/cmake/thirdparty.cmake
@@ -117,6 +117,7 @@ add_thirdparty(bitshuffle)
add_thirdparty(roaring)
add_thirdparty(fmt)
add_thirdparty(cctz)
+add_thirdparty(base64)
add_thirdparty(aws-cpp-sdk-core LIB64)
add_thirdparty(aws-cpp-sdk-s3 LIB64)
diff --git a/be/src/exec/olap_utils.h b/be/src/exec/olap_utils.h
index 4024337c462..2e101b1270f 100644
--- a/be/src/exec/olap_utils.h
+++ b/be/src/exec/olap_utils.h
@@ -61,42 +61,6 @@ public:
}
};
-static char encoding_table[] = {'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I',
'J', 'K', 'L', 'M',
- 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V',
'W', 'X', 'Y', 'Z',
- 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i',
'j', 'k', 'l', 'm',
- 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
'w', 'x', 'y', 'z',
- '0', '1', '2', '3', '4', '5', '6', '7', '8',
'9', '+', '/'};
-
-static int mod_table[] = {0, 2, 1};
-static const char base64_pad = '=';
-
-inline size_t base64_encode(const char* data, size_t length, char*
encoded_data) {
- size_t output_length = (size_t)(4.0 * ceil((double)length / 3.0));
-
- if (encoded_data == nullptr) {
- return 0;
- }
-
- for (uint32_t i = 0, j = 0; i < length;) {
- uint32_t octet_a = i < length ? (unsigned char)data[i++] : 0;
- uint32_t octet_b = i < length ? (unsigned char)data[i++] : 0;
- uint32_t octet_c = i < length ? (unsigned char)data[i++] : 0;
-
- uint32_t triple = (octet_a << 0x10) + (octet_b << 0x08) + octet_c;
-
- encoded_data[j++] = encoding_table[(triple >> 3 * 6) & 0x3F];
- encoded_data[j++] = encoding_table[(triple >> 2 * 6) & 0x3F];
- encoded_data[j++] = encoding_table[(triple >> 1 * 6) & 0x3F];
- encoded_data[j++] = encoding_table[(triple >> 0 * 6) & 0x3F];
- }
-
- for (int i = 0; i < mod_table[length % 3]; i++) {
- encoded_data[output_length - 1 - i] = base64_pad;
- }
-
- return output_length;
-}
-
enum SQLFilterOp {
FILTER_LARGER = 0,
FILTER_LARGER_OR_EQUAL = 1,
diff --git a/be/src/util/url_coding.cpp b/be/src/util/url_coding.cpp
index 7d6e264d5e9..6ddd4c05401 100644
--- a/be/src/util/url_coding.cpp
+++ b/be/src/util/url_coding.cpp
@@ -17,6 +17,7 @@
#include "util/url_coding.h"
+#include <libbase64.h>
#include <math.h>
#include <memory>
@@ -86,165 +87,35 @@ bool url_decode(const std::string& in, std::string* out) {
return true;
}
-static void encode_base64_internal(const std::string& in, std::string* out,
- const unsigned char* basis, bool padding) {
- size_t len = in.size();
- // Every 3 source bytes will be encoded into 4 bytes.
- out->resize((len + 2) / 3 * 4);
- unsigned char* d = (unsigned char*)out->data();
- const auto* s = reinterpret_cast<const unsigned char*>(in.data());
- while (len > 2) {
- *d++ = basis[(s[0] >> 2) & 0x3f];
- *d++ = basis[((s[0] & 3) << 4) | (s[1] >> 4)];
- *d++ = basis[((s[1] & 0x0f) << 2) | (s[2] >> 6)];
- *d++ = basis[s[2] & 0x3f];
-
- s += 3;
- len -= 3;
- }
- if (len) {
- *d++ = basis[(s[0] >> 2) & 0x3f];
- if (len == 1) {
- *d++ = basis[(s[0] & 3) << 4];
- if (padding) {
- *d++ = '=';
- }
- } else {
- *d++ = basis[((s[0] & 3) << 4) | (s[1] >> 4)];
- *d++ = basis[(s[1] & 0x0f) << 2];
- }
- if (padding) {
- *d++ = '=';
- }
- }
- out->resize((char*)d - out->data());
-}
-
void base64_encode(const std::string& in, std::string* out) {
- static unsigned char basis64[] =
- "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
- encode_base64_internal(in, out, basis64, true);
+ out->resize(in.length() * (4.0 / 3) + 1);
+ auto len = base64_encode(reinterpret_cast<const unsigned
char*>(in.c_str()), in.length(),
+ (unsigned char*)out->c_str());
+ out->resize(len);
}
-static char encoding_table[] = {'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I',
'J', 'K', 'L', 'M',
- 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V',
'W', 'X', 'Y', 'Z',
- 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i',
'j', 'k', 'l', 'm',
- 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
'w', 'x', 'y', 'z',
- '0', '1', '2', '3', '4', '5', '6', '7', '8',
'9', '+', '/'};
-
-static const char base64_pad = '=';
-
-static short decoding_table[256] = {
- -2, -2, -2, -2, -2, -2, -2, -2, -2, -1, -1, -2, -2, -1, -2, -2, -2,
-2, -2, -2, -2, -2,
- -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -1, -2, -2, -2, -2, -2, -2,
-2, -2, -2, -2, 62,
- -2, -2, -2, 63, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -2, -2, -2,
-2, -2, -2, -2, 0,
- 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
18, 19, 20, 21, 22,
- 23, 24, 25, -2, -2, -2, -2, -2, -2, 26, 27, 28, 29, 30, 31, 32, 33,
34, 35, 36, 37, 38,
- 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -2, -2, -2, -2,
-2, -2, -2, -2, -2,
- -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
-2, -2, -2, -2, -2,
- -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
-2, -2, -2, -2, -2,
- -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
-2, -2, -2, -2, -2,
- -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
-2, -2, -2, -2, -2,
- -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
-2, -2, -2, -2, -2,
- -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2};
-
-static int mod_table[] = {0, 2, 1};
-
size_t base64_encode(const unsigned char* data, size_t length, unsigned char*
encoded_data) {
- auto output_length = (size_t)(4.0 * ceil((double)length / 3.0));
-
- if (encoded_data == nullptr) {
- return 0;
- }
-
- for (uint32_t i = 0, j = 0; i < length;) {
- uint32_t octet_a = i < length ? data[i++] : 0;
- uint32_t octet_b = i < length ? data[i++] : 0;
- uint32_t octet_c = i < length ? data[i++] : 0;
- uint32_t triple = (octet_a << 0x10) + (octet_b << 0x08) + octet_c;
-
- encoded_data[j++] = encoding_table[(triple >> 3 * 6) & 0x3F];
- encoded_data[j++] = encoding_table[(triple >> 2 * 6) & 0x3F];
- encoded_data[j++] = encoding_table[(triple >> 1 * 6) & 0x3F];
- encoded_data[j++] = encoding_table[(triple >> 0 * 6) & 0x3F];
- }
-
- for (int i = 0; i < mod_table[length % 3]; i++) {
- encoded_data[output_length - 1 - i] = '=';
- }
-
- return output_length;
+ size_t encode_len = 0;
+#if defined(__aarch64__) || defined(_M_ARM64)
+ do_base64_encode(reinterpret_cast<const char*>(data), length,
+ reinterpret_cast<char*>(encoded_data), &encode_len,
BASE64_FORCE_NEON64);
+#else
+ do_base64_encode(reinterpret_cast<const char*>(data), length,
+ reinterpret_cast<char*>(encoded_data), &encode_len, 0);
+#endif
+ return encode_len;
}
int64_t base64_decode(const char* data, size_t length, char* decoded_data) {
- const char* current = data;
- int ch = 0;
- int i = 0;
- int j = 0;
- int k = 0;
-
- // run through the whole string, converting as we go
- while ((ch = *current++) != '\0' && length-- > 0) {
- if (ch >= 256 || ch < 0) {
- return -1;
- }
-
- if (ch == base64_pad) {
- if (*current != '=' && (i % 4) == 1) {
- return -1;
- }
- continue;
- }
-
- ch = decoding_table[ch];
- // a space or some other separator character, we simply skip over
- if (ch == -1) {
- continue;
- } else if (ch == -2) {
- return -1;
- }
-
- switch (i % 4) {
- case 0:
- decoded_data[j] = ch << 2;
- break;
- case 1:
- decoded_data[j++] |= ch >> 4;
- decoded_data[j] = (ch & 0x0f) << 4;
- break;
- case 2:
- decoded_data[j++] |= ch >> 2;
- decoded_data[j] = (ch & 0x03) << 6;
- break;
- case 3:
- decoded_data[j++] |= ch;
- break;
- default:
- break;
- }
-
- i++;
- }
-
- k = j;
- /* mop things up if we ended on a boundary */
- if (ch == base64_pad) {
- switch (i % 4) {
- case 1:
- return 0;
- case 2:
- k++;
- [[fallthrough]];
- case 3:
- decoded_data[k] = 0;
- default:
- break;
- }
- }
-
- decoded_data[j] = '\0';
-
- return j;
+ size_t decode_len = 0;
+#if defined(__aarch64__) || defined(_M_ARM64)
+ auto ret = do_base64_decode(reinterpret_cast<const char*>(data), length,
decoded_data,
+ &decode_len, BASE64_FORCE_NEON64);
+#else
+ auto ret = do_base64_decode(reinterpret_cast<const char*>(data), length,
decoded_data,
+ &decode_len, 0);
+#endif
+ return ret > 0 ? decode_len : -1;
}
bool base64_decode(const std::string& in, std::string* out) {
diff --git a/be/src/vec/functions/function_bitmap.cpp
b/be/src/vec/functions/function_bitmap.cpp
index d1f6cf432ee..4d77b85259f 100644
--- a/be/src/vec/functions/function_bitmap.cpp
+++ b/be/src/vec/functions/function_bitmap.cpp
@@ -282,7 +282,7 @@ struct BitmapFromBase64 {
decode_buff.resize(curr_decode_buff_len);
last_decode_buff_len = curr_decode_buff_len;
}
- int outlen = base64_decode(src_str, src_size, decode_buff.data());
+ auto outlen = base64_decode(src_str, src_size, decode_buff.data());
if (outlen < 0) {
res.emplace_back();
null_map[i] = 1;
@@ -1012,8 +1012,8 @@ struct BitmapToBase64 {
}
bitmap_val.write_to(ser_buff.data());
- int outlen = base64_encode((const unsigned char*)ser_buff.data(),
cur_ser_size,
- chars_data + encoded_offset);
+ auto outlen = base64_encode((const unsigned char*)ser_buff.data(),
cur_ser_size,
+ chars_data + encoded_offset);
DCHECK(outlen > 0);
encoded_offset += (int)(4.0 * ceil((double)cur_ser_size / 3.0));
diff --git a/be/src/vec/functions/function_string.cpp
b/be/src/vec/functions/function_string.cpp
index ce2c94b937b..4da6d11a5e1 100644
--- a/be/src/vec/functions/function_string.cpp
+++ b/be/src/vec/functions/function_string.cpp
@@ -817,7 +817,7 @@ struct ToBase64Impl {
dst = dst_uptr.get();
}
- int outlen = base64_encode((const unsigned char*)source, srclen,
(unsigned char*)dst);
+ auto outlen = base64_encode((const unsigned char*)source, srclen,
(unsigned char*)dst);
StringOP::push_value_string(std::string_view(dst, outlen), i,
dst_data, dst_offsets);
}
@@ -860,7 +860,7 @@ struct ToBase64OldImpl {
dst = dst_uptr.get();
}
- int outlen = base64_encode((const unsigned char*)source, srclen,
(unsigned char*)dst);
+ auto outlen = base64_encode((const unsigned char*)source, srclen,
(unsigned char*)dst);
StringOP::push_value_string(std::string_view(dst, outlen), i,
dst_data, dst_offsets);
}
@@ -902,7 +902,7 @@ struct FromBase64Impl {
dst_uptr.reset(new char[cipher_len]);
dst = dst_uptr.get();
}
- int outlen = base64_decode(source, srclen, dst);
+ auto outlen = base64_decode(source, srclen, dst);
if (outlen < 0) {
StringOP::push_null_string(i, dst_data, dst_offsets, null_map);
diff --git a/regression-test/suites/load_p0/stream_load/test_stream_load.groovy
b/regression-test/suites/load_p0/stream_load/test_stream_load.groovy
index ee69cc47779..6c002b2d29b 100644
--- a/regression-test/suites/load_p0/stream_load/test_stream_load.groovy
+++ b/regression-test/suites/load_p0/stream_load/test_stream_load.groovy
@@ -1030,7 +1030,7 @@ suite("test_stream_load", "p0") {
set 'column_separator', '|'
set 'columns', 'k1, k2, v1, v2, v3'
set 'strict_mode', 'true'
- set 'Authorization', 'Basic Y29tbW9uX3VzZXI6MTIzNDU2dGVzdCE='
+ set 'Authorization', 'Basic Y29tbW9uX3VzZXJAJyUnOjEyMzQ1NnRlc3Qh'
file 'test_auth.csv'
time 10000 // limit inflight 10s
diff --git
a/regression-test/suites/load_p0/stream_load/test_stream_load_move_memtable.groovy
b/regression-test/suites/load_p0/stream_load/test_stream_load_move_memtable.groovy
index e0d00120552..09d9e57bf6e 100644
---
a/regression-test/suites/load_p0/stream_load/test_stream_load_move_memtable.groovy
+++
b/regression-test/suites/load_p0/stream_load/test_stream_load_move_memtable.groovy
@@ -877,8 +877,8 @@ suite("test_stream_load_move_memtable", "p0") {
PROPERTIES ("replication_allocation" = "tag.location.default: 1");
"""
- sql """create USER common_user1@'%' IDENTIFIED BY '123456test!'"""
- sql """GRANT LOAD_PRIV ON *.* TO 'common_user1'@'%';"""
+ sql """create USER ddd IDENTIFIED BY '123456test!'"""
+ sql """GRANT LOAD_PRIV ON *.* TO 'ddd';"""
streamLoad {
table "${tableName13}"
@@ -886,7 +886,7 @@ suite("test_stream_load_move_memtable", "p0") {
set 'column_separator', '|'
set 'columns', 'k1, k2, v1, v2, v3'
set 'strict_mode', 'true'
- set 'Authorization', 'Basic Y29tbW9uX3VzZXIxOjEyMzQ1NnRlc3Qh'
+ set 'Authorization', 'Basic ZGRkOjEyMzQ1NnRlc3Qh'
set 'memtable_on_sink_node', 'true'
file 'test_auth.csv'
@@ -906,7 +906,7 @@ suite("test_stream_load_move_memtable", "p0") {
}
sql "sync"
- sql """DROP USER 'common_user1'@'%'"""
+ sql """DROP USER 'ddd'"""
// test default value
def tableName14 = "test_default_value_mm"
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]