This is an automated email from the ASF dual-hosted git repository.

hello-stephen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new ac34f069c25 [test](be) add JsonbSerializeUtil::block_to_jsonb 
golden-file UT covering all row-store-supported types (#63448)
ac34f069c25 is described below

commit ac34f069c25df0474a21de04de9149ab90ba348d
Author: Chenyang Sun <[email protected]>
AuthorDate: Thu May 21 17:50:03 2026 +0800

    [test](be) add JsonbSerializeUtil::block_to_jsonb golden-file UT covering 
all row-store-supported types (#63448)
    
    The fixture covers every type whose SerDe implements
    write_one_cell_to_jsonb: all integer / float scalars, DATE, DATETIME,
    DATEV2, DATETIMEV2, TIMESTAMPTZ, TIMEV2, IPV4, IPV6, STRING, VARCHAR,
    CHAR, JSONB, DECIMALV2/32/64/128I/256, BITMAP, HLL, QUANTILE_STATE,
    ARRAY<INT>, MAP<STRING,STRING>, STRUCT<INT,STRING>, VARIANT, and
    Nullable<INT/STRING/BITMAP>.
    
    The CHAR assertions also pin down that block_to_jsonb is a pure byte
    passthrough — decoded cell lengths must equal the originally written
    lengths, NOT the schema-declared CHAR(8) capacity.
---
 be/src/storage/row_cursor.cpp                      |   11 +
 be/test/core/jsonb/serialize_test.cpp              | 1060 ++++++++++++++++++++
 .../test_data/block_to_jsonb_all_types_golden.bin  |  Bin 0 -> 1586 bytes
 3 files changed, 1071 insertions(+)

diff --git a/be/src/storage/row_cursor.cpp b/be/src/storage/row_cursor.cpp
index e98112beddb..e0fb9ade80f 100644
--- a/be/src/storage/row_cursor.cpp
+++ b/be/src/storage/row_cursor.cpp
@@ -200,6 +200,17 @@ void RowCursor::_encode_column_value(const TabletColumn* 
column, const Field& va
     }
 }
 
+// Encodes the first `num_keys` key columns as a memcomparable byte string.
+// Each slot is [marker][value bytes]. The marker sits at a position that
+// real entries fill with KEY_NORMAL_MARKER (0x02), so any byte > 0x02 there
+// sorts strictly after every real entry — independent of the value bytes.
+//
+// Examples — PK (a STRING, b STRING), stored entry (foo, bar) encodes as
+// `02 foo | 02 bar`. Calls with num_keys=2 and only partial key "foo":
+//
+//   padding_minimal=true                  -> 02 foo | 00          (MINIMAL)
+//   padding_minimal=false, is_mow=false   -> 02 foo | FF          (MAXIMAL)
+//   padding_minimal=false, is_mow=true    -> 02 foo | 03      (NORMAL_NEXT)
 template <bool is_mow>
 void RowCursor::encode_key_with_padding(std::string* buf, size_t num_keys,
                                         bool padding_minimal) const {
diff --git a/be/test/core/jsonb/serialize_test.cpp 
b/be/test/core/jsonb/serialize_test.cpp
index 2ca92d85837..2419383b0ed 100644
--- a/be/test/core/jsonb/serialize_test.cpp
+++ b/be/test/core/jsonb/serialize_test.cpp
@@ -24,6 +24,7 @@
 #include <stdint.h>
 
 #include <cassert>
+#include <fstream>
 #include <iostream>
 #include <memory>
 #include <string>
@@ -44,34 +45,539 @@
 #include "core/column/column_nullable.h"
 #include "core/column/column_string.h"
 #include "core/column/column_struct.h"
+#include "core/column/column_variant.h"
 #include "core/column/column_vector.h"
 #include "core/data_type/data_type.h"
 #include "core/data_type/data_type_array.h"
 #include "core/data_type/data_type_bitmap.h"
+#include "core/data_type/data_type_date.h"
 #include "core/data_type/data_type_date_or_datetime_v2.h"
+#include "core/data_type/data_type_date_time.h"
 #include "core/data_type/data_type_decimal.h"
 #include "core/data_type/data_type_factory.hpp"
 #include "core/data_type/data_type_hll.h"
+#include "core/data_type/data_type_ipv4.h"
+#include "core/data_type/data_type_ipv6.h"
+#include "core/data_type/data_type_jsonb.h"
 #include "core/data_type/data_type_map.h"
 #include "core/data_type/data_type_nullable.h"
 #include "core/data_type/data_type_number.h"
+#include "core/data_type/data_type_quantilestate.h"
 #include "core/data_type/data_type_string.h"
 #include "core/data_type/data_type_struct.h"
+#include "core/data_type/data_type_time.h"
+#include "core/data_type/data_type_timestamptz.h"
+#include "core/data_type/data_type_variant.h"
 #include "core/data_type/define_primitive_type.h"
 #include "core/data_type_serde/data_type_serde.h"
 #include "core/field.h"
 #include "core/types.h"
 #include "core/value/bitmap_value.h"
 #include "core/value/hll.h"
+#include "core/value/jsonb_value.h"
+#include "core/value/quantile_state.h"
+#include "core/value/timestamptz_value.h"
 #include "core/value/vdatetime_value.h"
+#include "exec/common/variant_util.h"
 #include "exprs/aggregate/aggregate_function.h"
 #include "gtest/gtest_pred_impl.h"
 #include "runtime/descriptors.h"
 #include "storage/olap_common.h"
 #include "storage/tablet/tablet_schema.h"
+#include "testutil/test_util.h"
 
 namespace doris {
 
+// Golden-file path (same convention as be/test/util/jsonb_serialize_test.cpp):
+//   - source of truth: 
be/test/util/test_data/block_to_jsonb_all_types_golden.bin
+//   - run-be-ut.sh copies be/test/util/test_data/ into the build dir before 
tests run,
+//     so the runtime path under GetCurrentRunningDir() is 
/util/test_data/<file>.
+//
+// File format (little-endian):
+//   u32 num_rows
+//   for each row:
+//     u32 size_i
+//     <size_i> bytes : raw JSONB bytes for that row
+//
+// Regenerate (only after an intentional encoding change):
+//   DORIS_REGEN_JSONB_GOLDEN=1 ... ./doris_be_test \
+//       
--gtest_filter='BlockSerializeTest.GenerateAllRowStoreSupportedTypesGolden'
+//   then `git add` the regenerated file.
+inline static const std::string kBlockToJsonbGoldenRel =
+        "/util/test_data/block_to_jsonb_all_types_golden.bin";
+
+// Generator-side helper: writes a ColumnString full of per-row JSONB blobs to 
disk.
+// The normal verify path NEVER calls this; only the gated regen test does.
+static void dump_jsonb_column_to_file(const ColumnString& jsonb_col, const 
std::string& path) {
+    std::ofstream ofs(path, std::ios::binary | std::ios::trunc);
+    if (!ofs) {
+        throw std::runtime_error("Failed to open golden file for write: " + 
path);
+    }
+    const uint32_t num_rows = static_cast<uint32_t>(jsonb_col.size());
+    ofs.write(reinterpret_cast<const char*>(&num_rows), sizeof(num_rows));
+    for (uint32_t i = 0; i < num_rows; ++i) {
+        auto ref = jsonb_col.get_data_at(i);
+        const uint32_t sz = static_cast<uint32_t>(ref.size);
+        ofs.write(reinterpret_cast<const char*>(&sz), sizeof(sz));
+        ofs.write(ref.data, ref.size);
+    }
+    ofs.close();
+    if (!ofs) {
+        throw std::runtime_error("Failed to write golden file: " + path);
+    }
+}
+
+// Verifier-side helper: rebuilds a ColumnString from the golden file. The 
returned
+// column mimics the on-disk row-store column that jsonb_to_block consumes.
+static MutableColumnPtr load_jsonb_column_from_file(const std::string& path) {
+    std::ifstream ifs(path, std::ios::binary);
+    if (!ifs.is_open()) {
+        throw std::runtime_error(
+                "Golden file not found: " + path +
+                ". Regenerate with DORIS_REGEN_JSONB_GOLDEN=1 + the *Generate* 
test.");
+    }
+    uint32_t num_rows = 0;
+    ifs.read(reinterpret_cast<char*>(&num_rows), sizeof(num_rows));
+    auto col = ColumnString::create();
+    for (uint32_t i = 0; i < num_rows; ++i) {
+        uint32_t sz = 0;
+        ifs.read(reinterpret_cast<char*>(&sz), sizeof(sz));
+        std::string buf(sz, '\0');
+        ifs.read(buf.data(), sz);
+        if (!ifs.good() && !ifs.eof()) {
+            throw std::runtime_error("Truncated golden file at row " + 
std::to_string(i) + ": " +
+                                     path);
+        }
+        col->insert_data(buf.data(), sz);
+    }
+    return col;
+}
+
+// Shared: builds the deterministic Block + TabletSchema that covers every 
row-store
+// supported type (the union of types whose SerDe implements 
write_one_cell_to_jsonb
+// without throwing NOT_IMPLEMENTED): numeric scalars, decimals, dates, IPs, 
strings
+// (STRING / VARCHAR / CHAR / JSONB), BITMAP / HLL / QUANTILE_STATE, ARRAY / 
MAP /
+// STRUCT, and Nullable<> wrappers.
+//
+// Used by:
+//   (1) the generator test, which encodes this block via block_to_jsonb and 
writes
+//       the resulting bytes to the golden file.
+//   (2) the verifier test, which uses it as (a) the schema/data-type source 
needed
+//       by jsonb_to_block and (b) the ground-truth expected values to assert 
against
+//       the decoded block.
+static void build_all_row_store_types_block(Block& block, TabletSchema& 
schema) {
+    constexpr int kNumRows = 3;
+    int32_t cid = 1;
+    auto add = [&](const std::string& name, DataTypePtr type, MutableColumnPtr 
col) {
+        TabletColumn c;
+        c.set_name(name);
+        c.set_unique_id(cid++);
+        c.set_type(FieldType::OLAP_FIELD_TYPE_INT);
+        schema.append_column(c);
+        block.insert(ColumnWithTypeAndName(std::move(col), type, name));
+    };
+
+    // ---- numeric scalars ----
+    {
+        auto col = ColumnUInt8::create();
+        col->get_data() = {0, 1, 0};
+        add("c_bool", std::make_shared<DataTypeUInt8>(), std::move(col));
+    }
+    {
+        auto col = ColumnInt8::create();
+        col->get_data() = {-1, 0, 127};
+        add("c_tinyint", std::make_shared<DataTypeInt8>(), std::move(col));
+    }
+    {
+        auto col = ColumnInt16::create();
+        col->get_data() = {-32000, 0, 32000};
+        add("c_smallint", std::make_shared<DataTypeInt16>(), std::move(col));
+    }
+    {
+        auto col = ColumnInt32::create();
+        col->get_data() = {-1, 0, 1024};
+        add("c_int", std::make_shared<DataTypeInt32>(), std::move(col));
+    }
+    {
+        auto col = ColumnInt64::create();
+        col->get_data() = {-1, 0, 1L << 40};
+        add("c_bigint", std::make_shared<DataTypeInt64>(), std::move(col));
+    }
+    {
+        auto col = ColumnInt128::create();
+        col->get_data() = {static_cast<Int128>(-1), static_cast<Int128>(0),
+                           static_cast<Int128>(1) << 100};
+        add("c_largeint", std::make_shared<DataTypeInt128>(), std::move(col));
+    }
+    {
+        auto col = ColumnFloat32::create();
+        col->get_data() = {-1.5f, 0.0f, 3.14f};
+        add("c_float", std::make_shared<DataTypeFloat32>(), std::move(col));
+    }
+    {
+        auto col = ColumnFloat64::create();
+        col->get_data() = {-1.5, 0.0, 3.14};
+        add("c_double", std::make_shared<DataTypeFloat64>(), std::move(col));
+    }
+
+    // ---- date / time ----
+    {
+        auto col = ColumnDateV2::create();
+        auto pack = [](int y, int m, int d) {
+            DateV2Value<DateV2ValueType> v;
+            v.unchecked_set_time(y, m, d, 0, 0, 0, 0);
+            return *reinterpret_cast<UInt32*>(&v);
+        };
+        col->get_data() = {pack(2022, 6, 6), pack(2024, 12, 31), pack(2026, 5, 
20)};
+        add("c_datev2", std::make_shared<DataTypeDateV2>(), std::move(col));
+    }
+    {
+        auto col = ColumnDateTimeV2::create();
+        auto pack = [](int y, int mo, int d, int h, int mi, int s) {
+            DateV2Value<DateTimeV2ValueType> v;
+            v.unchecked_set_time(y, mo, d, h, mi, s, 0);
+            return *reinterpret_cast<UInt64*>(&v);
+        };
+        col->get_data() = {pack(2022, 6, 6, 12, 0, 0), pack(2024, 12, 31, 23, 
59, 59),
+                           pack(2026, 5, 20, 1, 2, 3)};
+        add("c_datetimev2", std::make_shared<DataTypeDateTimeV2>(0), 
std::move(col));
+    }
+    {
+        auto col = ColumnTimeV2::create();
+        col->get_data() = {0.0, 1500000.0, 3600000000.0};
+        add("c_timev2", std::make_shared<DataTypeTimeV2>(0), std::move(col));
+    }
+
+    // ---- legacy DATE / DATETIME (VecDateTimeValue-backed, Int64 storage) 
----
+    {
+        auto col = ColumnDate::create();
+        VecDateTimeValue v1;
+        VecDateTimeValue v2;
+        VecDateTimeValue v3;
+        v1.unchecked_set_time(2020, 1, 1, 0, 0, 0);
+        v2.unchecked_set_time(2022, 6, 15, 0, 0, 0);
+        v3.unchecked_set_time(2026, 5, 20, 0, 0, 0);
+        col->insert_value(v1);
+        col->insert_value(v2);
+        col->insert_value(v3);
+        add("c_date", std::make_shared<DataTypeDate>(), std::move(col));
+    }
+    {
+        auto col = ColumnDateTime::create();
+        VecDateTimeValue v1;
+        VecDateTimeValue v2;
+        VecDateTimeValue v3;
+        v1.unchecked_set_time(2020, 1, 1, 12, 30, 45);
+        v2.unchecked_set_time(2022, 6, 15, 23, 59, 59);
+        v3.unchecked_set_time(2026, 5, 20, 1, 2, 3);
+        col->insert_value(v1);
+        col->insert_value(v2);
+        col->insert_value(v3);
+        add("c_datetime", std::make_shared<DataTypeDateTime>(), 
std::move(col));
+    }
+    // ---- TIMESTAMPTZ (TimestampTzValue, UInt64 storage) ----
+    {
+        auto col = ColumnTimeStampTz::create();
+        TimestampTzValue v1;
+        TimestampTzValue v2;
+        TimestampTzValue v3;
+        v1.unchecked_set_time(2020, 1, 1, 12, 30, 45);
+        v2.unchecked_set_time(2024, 12, 31, 23, 59, 59);
+        v3.unchecked_set_time(2026, 5, 20, 0, 0, 0);
+        col->insert_value(v1);
+        col->insert_value(v2);
+        col->insert_value(v3);
+        add("c_timestamptz", std::make_shared<DataTypeTimeStampTz>(0), 
std::move(col));
+    }
+
+    // ---- IPs ----
+    {
+        auto col = ColumnIPv4::create();
+        col->get_data() = {static_cast<IPv4>(0), static_cast<IPv4>(0x7f000001),
+                           static_cast<IPv4>(0xc0a80101)};
+        add("c_ipv4", std::make_shared<DataTypeIPv4>(), std::move(col));
+    }
+    {
+        auto col = ColumnIPv6::create();
+        col->get_data() = {static_cast<IPv6>(0), static_cast<IPv6>(1),
+                           (static_cast<IPv6>(0x1234567890abcdefULL) << 64) |
+                                   static_cast<IPv6>(0xfedcba0987654321ULL)};
+        add("c_ipv6", std::make_shared<DataTypeIPv6>(), std::move(col));
+    }
+
+    // ---- strings ----
+    {
+        auto col = ColumnString::create();
+        col->insert_data("hello", 5);
+        col->insert_data("", 0);
+        col->insert_data("doris row store", 15);
+        add("c_string", std::make_shared<DataTypeString>(), std::move(col));
+    }
+    {
+        // VARCHAR: same SerDe / ColumnString as STRING.
+        auto col = ColumnString::create();
+        col->insert_data("vc-a", 4);
+        col->insert_data("vc-bb", 5);
+        col->insert_data("", 0);
+        add("c_varchar", std::make_shared<DataTypeString>(64, TYPE_VARCHAR), 
std::move(col));
+    }
+    {
+        // CHAR: row store does NOT pad to fixed length — bytes are 
passthrough.
+        auto col = ColumnString::create();
+        col->insert_data("ch-x", 4);
+        col->insert_data("", 0);
+        col->insert_data("ch-zzz", 6);
+        add("c_char", std::make_shared<DataTypeString>(8, TYPE_CHAR), 
std::move(col));
+    }
+    {
+        // JSONB: bytes must be valid JSONB binary (dump_data decodes via 
JsonbToJson).
+        auto col = ColumnString::create();
+        const char* payloads[] = {"{\"k\":1}", "[1,2,3]", "\"abc\""};
+        for (const auto* p : payloads) {
+            JsonBinaryValue jb;
+            THROW_IF_ERROR(jb.from_json_string(p, strlen(p)));
+            col->insert_data(jb.value(), jb.size());
+        }
+        add("c_jsonb", std::make_shared<DataTypeJsonb>(), std::move(col));
+    }
+
+    // ---- decimals ----
+    {
+        DataTypePtr t = doris::create_decimal(27, 9, true);
+        auto col = t->create_column();
+        auto& data = static_cast<ColumnDecimal128V2*>(col.get())->get_data();
+        for (int i = 1; i <= kNumRows; ++i) {
+            data.push_back(static_cast<__int128_t>(i) * 1000000000);
+        }
+        add("c_decimalv2", t, std::move(col));
+    }
+    {
+        DataTypePtr t = doris::create_decimal(9, 2, false);
+        auto col = t->create_column();
+        auto& data = static_cast<ColumnDecimal32*>(col.get())->get_data();
+        for (int i = 1; i <= kNumRows; ++i) {
+            data.push_back(i * 100);
+        }
+        add("c_decimal32", t, std::move(col));
+    }
+    {
+        DataTypePtr t = doris::create_decimal(18, 4, false);
+        auto col = t->create_column();
+        auto& data = static_cast<ColumnDecimal64*>(col.get())->get_data();
+        for (int i = 1; i <= kNumRows; ++i) {
+            data.push_back(static_cast<int64_t>(i) * 10000);
+        }
+        add("c_decimal64", t, std::move(col));
+    }
+    {
+        DataTypePtr t = doris::create_decimal(27, 9, false);
+        auto col = t->create_column();
+        auto& data = static_cast<ColumnDecimal128V3*>(col.get())->get_data();
+        for (int i = 1; i <= kNumRows; ++i) {
+            data.push_back(static_cast<__int128_t>(i) * 1000000000);
+        }
+        add("c_decimal128i", t, std::move(col));
+    }
+    {
+        DataTypePtr t = doris::create_decimal(76, 10, false);
+        auto col = t->create_column();
+        auto& data = static_cast<ColumnDecimal256*>(col.get())->get_data();
+        for (int i = 1; i <= kNumRows; ++i) {
+            data.push_back(wide::Int256(i) * wide::Int256(10000000000LL));
+        }
+        add("c_decimal256", t, std::move(col));
+    }
+
+    // ---- object types ----
+    {
+        DataTypePtr t = std::make_shared<DataTypeBitMap>();
+        auto col = t->create_column();
+        auto& container = static_cast<ColumnBitmap*>(col.get())->get_data();
+        for (int i = 0; i < kNumRows; ++i) {
+            BitmapValue bv;
+            for (int j = 0; j <= i; ++j) {
+                bv.add(j);
+            }
+            container.push_back(bv);
+        }
+        add("c_bitmap", t, std::move(col));
+    }
+    {
+        DataTypePtr t = std::make_shared<DataTypeHLL>();
+        auto col = t->create_column();
+        auto& container = static_cast<ColumnHLL*>(col.get())->get_data();
+        for (int i = 0; i < kNumRows; ++i) {
+            HyperLogLog hll;
+            hll.update(i);
+            container.push_back(hll);
+        }
+        add("c_hll", t, std::move(col));
+    }
+    {
+        DataTypePtr t = std::make_shared<DataTypeQuantileState>();
+        auto col = t->create_column();
+        auto& container = 
static_cast<ColumnQuantileState*>(col.get())->get_data();
+        for (int i = 0; i < kNumRows; ++i) {
+            QuantileState qs;
+            qs.add_value(static_cast<double>(i + 1));
+            container.push_back(qs);
+        }
+        add("c_quantilestate", t, std::move(col));
+    }
+
+    // ---- nested ARRAY<INT> ----
+    {
+        auto off = ColumnOffset64::create();
+        auto data = ColumnInt32::create();
+        std::vector<ColumnArray::Offset64> offs = {0, 2, 2, 4}; // 
[[1,2],[],[3,4]]
+        std::vector<int32_t> vals = {1, 2, 3, 4};
+        for (size_t i = 1; i < offs.size(); ++i) {
+            off->insert_data(reinterpret_cast<const char*>(&offs[i]), 0);
+        }
+        for (auto v : vals) {
+            data->insert_data(reinterpret_cast<const char*>(&v), 0);
+        }
+        DataTypePtr nested = make_nullable(std::make_shared<DataTypeInt32>());
+        DataTypePtr t = std::make_shared<DataTypeArray>(nested);
+        auto arr = ColumnArray::create(make_nullable(std::move(data)), 
std::move(off));
+        add("c_array_int", t, std::move(arr));
+    }
+
+    // ---- nested MAP<STRING, STRING> ----
+    {
+        DataTypePtr s = make_nullable(std::make_shared<DataTypeString>());
+        DataTypePtr t = std::make_shared<DataTypeMap>(s, s);
+        MutableColumnPtr col = t->create_column();
+        for (int i = 0; i < kNumRows; ++i) {
+            Array k;
+            Array v;
+            k.push_back(Field::create_field<TYPE_STRING>("k" + 
std::to_string(i)));
+            v.push_back(Field::create_field<TYPE_STRING>("v" + 
std::to_string(i)));
+            Map m;
+            m.push_back(Field::create_field<TYPE_ARRAY>(k));
+            m.push_back(Field::create_field<TYPE_ARRAY>(v));
+            col->insert(Field::create_field<TYPE_MAP>(m));
+        }
+        add("c_map", t, std::move(col));
+    }
+
+    // ---- nested STRUCT<INT, STRING> ----
+    {
+        DataTypePtr i = make_nullable(std::make_shared<DataTypeInt32>());
+        DataTypePtr s = make_nullable(std::make_shared<DataTypeString>());
+        DataTypePtr t = 
std::make_shared<DataTypeStruct>(std::vector<DataTypePtr> {i, s});
+        MutableColumnPtr col = t->create_column();
+        for (int x = 0; x < kNumRows; ++x) {
+            Struct st;
+            st.push_back(Field::create_field<TYPE_INT>(x));
+            st.push_back(Field::create_field<TYPE_STRING>("name" + 
std::to_string(x)));
+            col->insert(Field::create_field<TYPE_STRUCT>(st));
+        }
+        add("c_struct", t, std::move(col));
+    }
+
+    // ---- VARIANT (parsed from JSON literals) ----
+    {
+        auto t = std::make_shared<DataTypeVariant>();
+        auto col = t->create_column();
+        auto json_col = ColumnString::create();
+        const char* jsons[] = {"{\"a\":1}", "{\"b\":\"hello\"}", "[1,2,3]"};
+        for (const auto* j : jsons) {
+            json_col->insert_data(j, strlen(j));
+        }
+        ParseConfig parse_config;
+        variant_util::parse_json_to_variant(*col, *json_col, parse_config);
+        add("c_variant", t, std::move(col));
+    }
+
+    // ---- Nullable<> wrappers ----
+    // Null cells are skipped on write; jsonb_to_block fills them back via 
insert_default.
+    {
+        DataTypePtr t = make_nullable(std::make_shared<DataTypeInt32>());
+        auto col = t->create_column();
+        col->insert_default();
+        col->insert(Field::create_field<TYPE_INT>(42));
+        col->insert_default();
+        add("c_nullable_int", t, std::move(col));
+    }
+    {
+        DataTypePtr t = make_nullable(std::make_shared<DataTypeString>());
+        auto col = t->create_column();
+        col->insert(Field::create_field<TYPE_STRING>("a"));
+        col->insert_default();
+        col->insert(Field::create_field<TYPE_STRING>("c"));
+        add("c_nullable_string", t, std::move(col));
+    }
+    {
+        DataTypePtr t = make_nullable(std::make_shared<DataTypeBitMap>());
+        auto col = t->create_column();
+        BitmapValue bv;
+        bv.add(7);
+        bv.add(9);
+        col->insert_default();
+        col->insert(Field::create_field<TYPE_BITMAP>(BitmapValue(bv)));
+        col->insert_default();
+        add("c_nullable_bitmap", t, std::move(col));
+    }
+
+    DCHECK_EQ(block.rows(), kNumRows);
+}
+
+// Schema-only scaffold for the verifier: builds an EMPTY destination Block 
whose column
+// types, names, and unique ids must match what 
build_all_row_store_types_block produces.
+// jsonb_to_block will fill rows into this block from the golden file. The two 
functions
+// are intentionally parallel — they must stay in sync (column order, name, 
type, cid).
+static void make_all_row_store_types_dst_block(Block& dst, TabletSchema& 
schema) {
+    int32_t cid = 1;
+    auto add = [&](const std::string& name, DataTypePtr type) {
+        TabletColumn c;
+        c.set_name(name);
+        c.set_unique_id(cid++);
+        c.set_type(FieldType::OLAP_FIELD_TYPE_INT);
+        schema.append_column(c);
+        dst.insert(ColumnWithTypeAndName(type->create_column(), type, name));
+    };
+    add("c_bool", std::make_shared<DataTypeUInt8>());
+    add("c_tinyint", std::make_shared<DataTypeInt8>());
+    add("c_smallint", std::make_shared<DataTypeInt16>());
+    add("c_int", std::make_shared<DataTypeInt32>());
+    add("c_bigint", std::make_shared<DataTypeInt64>());
+    add("c_largeint", std::make_shared<DataTypeInt128>());
+    add("c_float", std::make_shared<DataTypeFloat32>());
+    add("c_double", std::make_shared<DataTypeFloat64>());
+    add("c_datev2", std::make_shared<DataTypeDateV2>());
+    add("c_datetimev2", std::make_shared<DataTypeDateTimeV2>(0));
+    add("c_timev2", std::make_shared<DataTypeTimeV2>(0));
+    add("c_date", std::make_shared<DataTypeDate>());
+    add("c_datetime", std::make_shared<DataTypeDateTime>());
+    add("c_timestamptz", std::make_shared<DataTypeTimeStampTz>(0));
+    add("c_ipv4", std::make_shared<DataTypeIPv4>());
+    add("c_ipv6", std::make_shared<DataTypeIPv6>());
+    add("c_string", std::make_shared<DataTypeString>());
+    add("c_varchar", std::make_shared<DataTypeString>(64, TYPE_VARCHAR));
+    add("c_char", std::make_shared<DataTypeString>(8, TYPE_CHAR));
+    add("c_jsonb", std::make_shared<DataTypeJsonb>());
+    add("c_decimalv2", doris::create_decimal(27, 9, true));
+    add("c_decimal32", doris::create_decimal(9, 2, false));
+    add("c_decimal64", doris::create_decimal(18, 4, false));
+    add("c_decimal128i", doris::create_decimal(27, 9, false));
+    add("c_decimal256", doris::create_decimal(76, 10, false));
+    add("c_bitmap", std::make_shared<DataTypeBitMap>());
+    add("c_hll", std::make_shared<DataTypeHLL>());
+    add("c_quantilestate", std::make_shared<DataTypeQuantileState>());
+    add("c_array_int",
+        
std::make_shared<DataTypeArray>(make_nullable(std::make_shared<DataTypeInt32>())));
+    add("c_map", 
std::make_shared<DataTypeMap>(make_nullable(std::make_shared<DataTypeString>()),
+                                               
make_nullable(std::make_shared<DataTypeString>())));
+    add("c_struct", std::make_shared<DataTypeStruct>(std::vector<DataTypePtr> {
+                            make_nullable(std::make_shared<DataTypeInt32>()),
+                            
make_nullable(std::make_shared<DataTypeString>())}));
+    add("c_variant", std::make_shared<DataTypeVariant>());
+    add("c_nullable_int", make_nullable(std::make_shared<DataTypeInt32>()));
+    add("c_nullable_string", 
make_nullable(std::make_shared<DataTypeString>()));
+    add("c_nullable_bitmap", 
make_nullable(std::make_shared<DataTypeBitMap>()));
+}
+
 static void fill_block_with_array_int(Block& block) {
     auto off_column = ColumnOffset64::create();
     auto data_column = ColumnInt32::create();
@@ -523,4 +1029,558 @@ TEST(BlockSerializeTest, JsonbBlock) {
     std::cout << new_block.dump_data() << std::endl;
     EXPECT_EQ(block.dump_data(), new_block.dump_data());
 }
+
+// Verifier (runs in every UT pass). Never calls block_to_jsonb — only 
jsonb_to_block.
+//
+// Flow:
+//   1. Build an EMPTY destination block with the right schema (no values).
+//   2. Read the golden file's raw JSONB bytes into a ColumnString.
+//   3. Decode via jsonb_to_block into the destination.
+//   4. For EACH column, inline EXPECT_EQ the decoded cells against hardcoded
+//      expected values matching exactly what the generator wrote — same 
pattern
+//      as the CHAR check (per-cell, value-by-value assertion).
+//
+// The expected literals MUST be kept in sync with what 
build_all_row_store_types_block
+// writes; if you change the test data, also re-run the generator to refresh 
the
+// golden file and update the expected literals here.
+TEST(BlockSerializeTest, AllRowStoreSupportedTypes) {
+    // 1) Empty destination block matching the encoder's schema.
+    Block dst;
+    TabletSchema schema;
+    make_all_row_store_types_dst_block(dst, schema);
+
+    // 2) Load on-disk golden bytes (NOT generated on the fly).
+    const std::string golden_path = GetCurrentRunningDir() + 
kBlockToJsonbGoldenRel;
+    auto jsonb_col = load_jsonb_column_from_file(golden_path);
+    constexpr size_t kNumRows = 3;
+    ASSERT_EQ(jsonb_col->size(), kNumRows) << "golden file row count 
unexpected";
+
+    // 3) Decode.
+    std::unordered_map<uint32_t, uint32_t> col_uid_to_idx;
+    std::vector<std::string> default_values(dst.columns());
+    for (uint32_t i = 0; i < dst.columns(); ++i) {
+        col_uid_to_idx[schema.columns()[i]->unique_id()] = i;
+    }
+    
THROW_IF_ERROR(JsonbSerializeUtil::jsonb_to_block(create_data_type_serdes(dst.get_data_types()),
+                                                      assert_cast<const 
ColumnString&>(*jsonb_col),
+                                                      col_uid_to_idx, dst, 
default_values, {}));
+
+    // 4) Per-column verification — same explicit pattern as the CHAR check.
+
+    // --- ColumnVector scalar checks: compare raw native value at each row. 
---
+    {
+        SCOPED_TRACE("c_bool");
+        const auto& col = assert_cast<const ColumnUInt8&>(
+                
*dst.get_by_position(dst.get_position_by_name("c_bool")).column);
+        const std::vector<UInt8> expected = {0, 1, 0};
+        ASSERT_EQ(col.size(), expected.size());
+        for (size_t i = 0; i < expected.size(); ++i) {
+            EXPECT_EQ(col.get_data()[i], expected[i]) << "row " << i;
+        }
+    }
+    {
+        SCOPED_TRACE("c_tinyint");
+        const auto& col = assert_cast<const ColumnInt8&>(
+                
*dst.get_by_position(dst.get_position_by_name("c_tinyint")).column);
+        const std::vector<int8_t> expected = {-1, 0, 127};
+        ASSERT_EQ(col.size(), expected.size());
+        for (size_t i = 0; i < expected.size(); ++i) {
+            EXPECT_EQ(col.get_data()[i], expected[i]) << "row " << i;
+        }
+    }
+    {
+        SCOPED_TRACE("c_smallint");
+        const auto& col = assert_cast<const ColumnInt16&>(
+                
*dst.get_by_position(dst.get_position_by_name("c_smallint")).column);
+        const std::vector<int16_t> expected = {-32000, 0, 32000};
+        ASSERT_EQ(col.size(), expected.size());
+        for (size_t i = 0; i < expected.size(); ++i) {
+            EXPECT_EQ(col.get_data()[i], expected[i]) << "row " << i;
+        }
+    }
+    {
+        SCOPED_TRACE("c_int");
+        const auto& col = assert_cast<const ColumnInt32&>(
+                
*dst.get_by_position(dst.get_position_by_name("c_int")).column);
+        const std::vector<int32_t> expected = {-1, 0, 1024};
+        ASSERT_EQ(col.size(), expected.size());
+        for (size_t i = 0; i < expected.size(); ++i) {
+            EXPECT_EQ(col.get_data()[i], expected[i]) << "row " << i;
+        }
+    }
+    {
+        SCOPED_TRACE("c_bigint");
+        const auto& col = assert_cast<const ColumnInt64&>(
+                
*dst.get_by_position(dst.get_position_by_name("c_bigint")).column);
+        const std::vector<int64_t> expected = {-1, 0, 1L << 40};
+        ASSERT_EQ(col.size(), expected.size());
+        for (size_t i = 0; i < expected.size(); ++i) {
+            EXPECT_EQ(col.get_data()[i], expected[i]) << "row " << i;
+        }
+    }
+    {
+        SCOPED_TRACE("c_largeint");
+        const auto& col = assert_cast<const ColumnInt128&>(
+                
*dst.get_by_position(dst.get_position_by_name("c_largeint")).column);
+        const std::vector<Int128> expected = {static_cast<Int128>(-1), 
static_cast<Int128>(0),
+                                              static_cast<Int128>(1) << 100};
+        ASSERT_EQ(col.size(), expected.size());
+        for (size_t i = 0; i < expected.size(); ++i) {
+            EXPECT_EQ(col.get_data()[i], expected[i]) << "row " << i;
+        }
+    }
+    {
+        SCOPED_TRACE("c_float");
+        const auto& col = assert_cast<const ColumnFloat32&>(
+                
*dst.get_by_position(dst.get_position_by_name("c_float")).column);
+        const std::vector<float> expected = {-1.5f, 0.0f, 3.14f};
+        ASSERT_EQ(col.size(), expected.size());
+        for (size_t i = 0; i < expected.size(); ++i) {
+            EXPECT_EQ(col.get_data()[i], expected[i]) << "row " << i;
+        }
+    }
+    {
+        SCOPED_TRACE("c_double");
+        const auto& col = assert_cast<const ColumnFloat64&>(
+                
*dst.get_by_position(dst.get_position_by_name("c_double")).column);
+        const std::vector<double> expected = {-1.5, 0.0, 3.14};
+        ASSERT_EQ(col.size(), expected.size());
+        for (size_t i = 0; i < expected.size(); ++i) {
+            EXPECT_EQ(col.get_data()[i], expected[i]) << "row " << i;
+        }
+    }
+
+    // --- date / time / IP: raw storage int representation. ---
+    {
+        SCOPED_TRACE("c_datev2");
+        const auto& col = assert_cast<const ColumnDateV2&>(
+                
*dst.get_by_position(dst.get_position_by_name("c_datev2")).column);
+        auto pack = [](int y, int m, int d) {
+            DateV2Value<DateV2ValueType> v;
+            v.unchecked_set_time(y, m, d, 0, 0, 0, 0);
+            return *reinterpret_cast<UInt32*>(&v);
+        };
+        const std::vector<UInt32> expected = {pack(2022, 6, 6), pack(2024, 12, 
31),
+                                              pack(2026, 5, 20)};
+        ASSERT_EQ(col.size(), expected.size());
+        for (size_t i = 0; i < expected.size(); ++i) {
+            EXPECT_EQ(col.get_data()[i], expected[i]) << "row " << i;
+        }
+    }
+    {
+        SCOPED_TRACE("c_datetimev2");
+        const auto& col = assert_cast<const ColumnDateTimeV2&>(
+                
*dst.get_by_position(dst.get_position_by_name("c_datetimev2")).column);
+        auto pack = [](int y, int mo, int d, int h, int mi, int s) {
+            DateV2Value<DateTimeV2ValueType> v;
+            v.unchecked_set_time(y, mo, d, h, mi, s, 0);
+            return *reinterpret_cast<UInt64*>(&v);
+        };
+        const std::vector<UInt64> expected = {pack(2022, 6, 6, 12, 0, 0),
+                                              pack(2024, 12, 31, 23, 59, 59),
+                                              pack(2026, 5, 20, 1, 2, 3)};
+        ASSERT_EQ(col.size(), expected.size());
+        for (size_t i = 0; i < expected.size(); ++i) {
+            EXPECT_EQ(col.get_data()[i], expected[i]) << "row " << i;
+        }
+    }
+    {
+        SCOPED_TRACE("c_timev2");
+        const auto& col = assert_cast<const ColumnTimeV2&>(
+                
*dst.get_by_position(dst.get_position_by_name("c_timev2")).column);
+        const std::vector<double> expected = {0.0, 1500000.0, 3600000000.0};
+        ASSERT_EQ(col.size(), expected.size());
+        for (size_t i = 0; i < expected.size(); ++i) {
+            EXPECT_EQ(col.get_data()[i], expected[i]) << "row " << i;
+        }
+    }
+    {
+        // c_date: ColumnDate stores VecDateTimeValue. 
VecDateTimeValue::operator== compares
+        // the full date+time+type tuple, so building the same expected values 
is the cleanest
+        // cell-by-cell check.
+        SCOPED_TRACE("c_date");
+        const auto& col = assert_cast<const ColumnDate&>(
+                
*dst.get_by_position(dst.get_position_by_name("c_date")).column);
+        std::vector<VecDateTimeValue> expected(3);
+        expected[0].unchecked_set_time(2020, 1, 1, 0, 0, 0);
+        expected[1].unchecked_set_time(2022, 6, 15, 0, 0, 0);
+        expected[2].unchecked_set_time(2026, 5, 20, 0, 0, 0);
+        ASSERT_EQ(col.size(), expected.size());
+        for (size_t i = 0; i < expected.size(); ++i) {
+            EXPECT_EQ(col.get_data()[i], expected[i]) << "row " << i;
+        }
+    }
+    {
+        SCOPED_TRACE("c_datetime");
+        const auto& col = assert_cast<const ColumnDateTime&>(
+                
*dst.get_by_position(dst.get_position_by_name("c_datetime")).column);
+        std::vector<VecDateTimeValue> expected(3);
+        expected[0].unchecked_set_time(2020, 1, 1, 12, 30, 45);
+        expected[1].unchecked_set_time(2022, 6, 15, 23, 59, 59);
+        expected[2].unchecked_set_time(2026, 5, 20, 1, 2, 3);
+        ASSERT_EQ(col.size(), expected.size());
+        for (size_t i = 0; i < expected.size(); ++i) {
+            EXPECT_EQ(col.get_data()[i], expected[i]) << "row " << i;
+        }
+    }
+    {
+        // c_timestamptz: ColumnTimeStampTz stores TimestampTzValue (UInt64 
underneath).
+        // Compare via the raw UInt64 representation derived from the same 
setup.
+        SCOPED_TRACE("c_timestamptz");
+        const auto& col = assert_cast<const ColumnTimeStampTz&>(
+                
*dst.get_by_position(dst.get_position_by_name("c_timestamptz")).column);
+        std::vector<TimestampTzValue> expected(3);
+        expected[0].unchecked_set_time(2020, 1, 1, 12, 30, 45);
+        expected[1].unchecked_set_time(2024, 12, 31, 23, 59, 59);
+        expected[2].unchecked_set_time(2026, 5, 20, 0, 0, 0);
+        ASSERT_EQ(col.size(), expected.size());
+        for (size_t i = 0; i < expected.size(); ++i) {
+            EXPECT_EQ(col.get_data()[i].to_date_int_val(), 
expected[i].to_date_int_val())
+                    << "row " << i;
+        }
+    }
+    {
+        SCOPED_TRACE("c_ipv4");
+        const auto& col = assert_cast<const ColumnIPv4&>(
+                
*dst.get_by_position(dst.get_position_by_name("c_ipv4")).column);
+        const std::vector<IPv4> expected = {static_cast<IPv4>(0), 
static_cast<IPv4>(0x7f000001),
+                                            static_cast<IPv4>(0xc0a80101)};
+        ASSERT_EQ(col.size(), expected.size());
+        for (size_t i = 0; i < expected.size(); ++i) {
+            EXPECT_EQ(col.get_data()[i], expected[i]) << "row " << i;
+        }
+    }
+    {
+        SCOPED_TRACE("c_ipv6");
+        const auto& col = assert_cast<const ColumnIPv6&>(
+                
*dst.get_by_position(dst.get_position_by_name("c_ipv6")).column);
+        const std::vector<IPv6> expected = {static_cast<IPv6>(0), 
static_cast<IPv6>(1),
+                                            
(static_cast<IPv6>(0x1234567890abcdefULL) << 64) |
+                                                    
static_cast<IPv6>(0xfedcba0987654321ULL)};
+        ASSERT_EQ(col.size(), expected.size());
+        for (size_t i = 0; i < expected.size(); ++i) {
+            EXPECT_EQ(col.get_data()[i], expected[i]) << "row " << i;
+        }
+    }
+
+    // --- strings: byte-exact, including the CHAR no-padding invariant. ---
+    {
+        SCOPED_TRACE("c_string");
+        const auto& col = assert_cast<const ColumnString&>(
+                
*dst.get_by_position(dst.get_position_by_name("c_string")).column);
+        const std::vector<std::string> expected = {"hello", "", "doris row 
store"};
+        ASSERT_EQ(col.size(), expected.size());
+        for (size_t i = 0; i < expected.size(); ++i) {
+            auto ref = col.get_data_at(i);
+            EXPECT_EQ(ref.size, expected[i].size()) << "row " << i;
+            EXPECT_EQ(std::string(ref.data, ref.size), expected[i]) << "row " 
<< i;
+        }
+    }
+    {
+        // VARCHAR(64) — sizes must equal originally written lengths, NOT 64.
+        SCOPED_TRACE("c_varchar");
+        const auto& col = assert_cast<const ColumnString&>(
+                
*dst.get_by_position(dst.get_position_by_name("c_varchar")).column);
+        const std::vector<std::string> expected = {"vc-a", "vc-bb", ""};
+        ASSERT_EQ(col.size(), expected.size());
+        for (size_t i = 0; i < expected.size(); ++i) {
+            auto ref = col.get_data_at(i);
+            EXPECT_EQ(ref.size, expected[i].size())
+                    << "row " << i << " was padded somewhere (expected len=" 
<< expected[i].size()
+                    << ", got " << ref.size << ")";
+            EXPECT_EQ(std::string(ref.data, ref.size), expected[i]) << "row " 
<< i;
+        }
+    }
+    {
+        // CHAR(8) — same no-padding invariant.
+        SCOPED_TRACE("c_char");
+        const auto& col = assert_cast<const ColumnString&>(
+                
*dst.get_by_position(dst.get_position_by_name("c_char")).column);
+        const std::vector<std::string> expected = {"ch-x", "", "ch-zzz"};
+        ASSERT_EQ(col.size(), expected.size());
+        for (size_t i = 0; i < expected.size(); ++i) {
+            auto ref = col.get_data_at(i);
+            EXPECT_EQ(ref.size, expected[i].size())
+                    << "row " << i << " was padded by block_to_jsonb / 
jsonb_to_block "
+                    << "(expected len=" << expected[i].size() << ", got " << 
ref.size << ")";
+            EXPECT_EQ(std::string(ref.data, ref.size), expected[i]) << "row " 
<< i;
+        }
+    }
+    {
+        // JSONB — expected bytes are the JSONB-binary form of each original 
JSON literal.
+        SCOPED_TRACE("c_jsonb");
+        const auto& col = assert_cast<const ColumnString&>(
+                
*dst.get_by_position(dst.get_position_by_name("c_jsonb")).column);
+        const char* payloads[] = {"{\"k\":1}", "[1,2,3]", "\"abc\""};
+        ASSERT_EQ(col.size(), 3);
+        for (size_t i = 0; i < 3; ++i) {
+            JsonBinaryValue expected;
+            THROW_IF_ERROR(expected.from_json_string(payloads[i], 
strlen(payloads[i])));
+            auto ref = col.get_data_at(i);
+            ASSERT_EQ(ref.size, expected.size()) << "row " << i;
+            EXPECT_EQ(memcmp(ref.data, expected.value(), expected.size()), 0) 
<< "row " << i;
+        }
+    }
+
+    // --- decimals: compare raw integer storage. DECIMALV2's CppType is the
+    // DecimalV2Value class with a value() accessor; the other decimals' 
CppType
+    // is the Decimal<T> struct with a .value member. ---
+    {
+        SCOPED_TRACE("c_decimalv2");
+        const auto& col = assert_cast<const ColumnDecimal128V2&>(
+                
*dst.get_by_position(dst.get_position_by_name("c_decimalv2")).column);
+        const std::vector<__int128_t> expected = {static_cast<__int128_t>(1) * 
1000000000,
+                                                  static_cast<__int128_t>(2) * 
1000000000,
+                                                  static_cast<__int128_t>(3) * 
1000000000};
+        ASSERT_EQ(col.size(), expected.size());
+        for (size_t i = 0; i < expected.size(); ++i) {
+            EXPECT_EQ(col.get_data()[i].value(), expected[i]) << "row " << i;
+        }
+    }
+    {
+        SCOPED_TRACE("c_decimal32");
+        const auto& col = assert_cast<const ColumnDecimal32&>(
+                
*dst.get_by_position(dst.get_position_by_name("c_decimal32")).column);
+        const std::vector<int32_t> expected = {100, 200, 300};
+        ASSERT_EQ(col.size(), expected.size());
+        for (size_t i = 0; i < expected.size(); ++i) {
+            EXPECT_EQ(col.get_data()[i].value, expected[i]) << "row " << i;
+        }
+    }
+    {
+        SCOPED_TRACE("c_decimal64");
+        const auto& col = assert_cast<const ColumnDecimal64&>(
+                
*dst.get_by_position(dst.get_position_by_name("c_decimal64")).column);
+        const std::vector<int64_t> expected = {10000, 20000, 30000};
+        ASSERT_EQ(col.size(), expected.size());
+        for (size_t i = 0; i < expected.size(); ++i) {
+            EXPECT_EQ(col.get_data()[i].value, expected[i]) << "row " << i;
+        }
+    }
+    {
+        SCOPED_TRACE("c_decimal128i");
+        const auto& col = assert_cast<const ColumnDecimal128V3&>(
+                
*dst.get_by_position(dst.get_position_by_name("c_decimal128i")).column);
+        const std::vector<__int128_t> expected = {static_cast<__int128_t>(1) * 
1000000000,
+                                                  static_cast<__int128_t>(2) * 
1000000000,
+                                                  static_cast<__int128_t>(3) * 
1000000000};
+        ASSERT_EQ(col.size(), expected.size());
+        for (size_t i = 0; i < expected.size(); ++i) {
+            EXPECT_EQ(col.get_data()[i].value, expected[i]) << "row " << i;
+        }
+    }
+    {
+        SCOPED_TRACE("c_decimal256");
+        const auto& col = assert_cast<const ColumnDecimal256&>(
+                
*dst.get_by_position(dst.get_position_by_name("c_decimal256")).column);
+        const std::vector<wide::Int256> expected = {wide::Int256(1) * 
wide::Int256(10000000000LL),
+                                                    wide::Int256(2) * 
wide::Int256(10000000000LL),
+                                                    wide::Int256(3) * 
wide::Int256(10000000000LL)};
+        ASSERT_EQ(col.size(), expected.size());
+        for (size_t i = 0; i < expected.size(); ++i) {
+            EXPECT_EQ(col.get_data()[i].value, expected[i]) << "row " << i;
+        }
+    }
+
+    // --- object columns: walk the underlying object per cell. ---
+    {
+        // BITMAP rows: {0}, {0,1}, {0,1,2}
+        SCOPED_TRACE("c_bitmap");
+        const auto& col = assert_cast<const ColumnBitmap&>(
+                
*dst.get_by_position(dst.get_position_by_name("c_bitmap")).column);
+        const std::vector<std::vector<uint64_t>> expected = {{0}, {0, 1}, {0, 
1, 2}};
+        ASSERT_EQ(col.size(), expected.size());
+        for (size_t i = 0; i < expected.size(); ++i) {
+            const auto& bv = col.get_data()[i];
+            EXPECT_EQ(bv.cardinality(), expected[i].size()) << "row " << i;
+            for (uint64_t v : expected[i]) {
+                EXPECT_TRUE(bv.contains(v)) << "row " << i << " missing " << v;
+            }
+        }
+    }
+    {
+        // HLL: each row was updated with a single value -> cardinality 
estimate is 1.
+        SCOPED_TRACE("c_hll");
+        const auto& col = assert_cast<const ColumnHLL&>(
+                
*dst.get_by_position(dst.get_position_by_name("c_hll")).column);
+        ASSERT_EQ(col.size(), 3);
+        for (size_t i = 0; i < 3; ++i) {
+            EXPECT_EQ(col.get_data()[i].estimate_cardinality(), 1) << "row " 
<< i;
+        }
+    }
+    {
+        // QuantileState: row i was add_value(i+1). Median over a single value 
== that value.
+        SCOPED_TRACE("c_quantilestate");
+        const auto& col = assert_cast<const ColumnQuantileState&>(
+                
*dst.get_by_position(dst.get_position_by_name("c_quantilestate")).column);
+        ASSERT_EQ(col.size(), 3);
+        for (size_t i = 0; i < 3; ++i) {
+            EXPECT_EQ(col.get_data()[i].get_value_by_percentile(0.5f), 
static_cast<double>(i + 1))
+                    << "row " << i;
+        }
+    }
+
+    // --- nested columns: walk offsets / sub-columns. ---
+    {
+        // ARRAY<INT>: [[1,2], [], [3,4]]
+        SCOPED_TRACE("c_array_int");
+        const auto& arr = assert_cast<const ColumnArray&>(
+                
*dst.get_by_position(dst.get_position_by_name("c_array_int")).column);
+        const auto& offsets = arr.get_offsets();
+        const auto& nested_nullable = assert_cast<const 
ColumnNullable&>(arr.get_data());
+        const auto& nested_int =
+                assert_cast<const 
ColumnInt32&>(nested_nullable.get_nested_column());
+        const std::vector<std::vector<int32_t>> expected = {{1, 2}, {}, {3, 
4}};
+        ASSERT_EQ(arr.size(), expected.size());
+        size_t cursor = 0;
+        for (size_t i = 0; i < expected.size(); ++i) {
+            const size_t len = offsets[i] - (i == 0 ? 0 : offsets[i - 1]);
+            ASSERT_EQ(len, expected[i].size()) << "row " << i << " array 
length";
+            for (size_t k = 0; k < expected[i].size(); ++k) {
+                EXPECT_FALSE(nested_nullable.is_null_at(cursor + k))
+                        << "row " << i << " element " << k;
+                EXPECT_EQ(nested_int.get_data()[cursor + k], expected[i][k])
+                        << "row " << i << " element " << k;
+            }
+            cursor += len;
+        }
+    }
+    {
+        // MAP<STRING, STRING>: {"k0":"v0"}, {"k1":"v1"}, {"k2":"v2"}
+        SCOPED_TRACE("c_map");
+        const auto& m = assert_cast<const ColumnMap&>(
+                
*dst.get_by_position(dst.get_position_by_name("c_map")).column);
+        const auto& offsets = m.get_offsets();
+        const auto& keys_nullable = assert_cast<const 
ColumnNullable&>(m.get_keys());
+        const auto& keys = assert_cast<const 
ColumnString&>(keys_nullable.get_nested_column());
+        const auto& vals_nullable = assert_cast<const 
ColumnNullable&>(m.get_values());
+        const auto& vals = assert_cast<const 
ColumnString&>(vals_nullable.get_nested_column());
+        ASSERT_EQ(m.size(), 3);
+        for (size_t i = 0; i < 3; ++i) {
+            const size_t start = (i == 0) ? 0 : offsets[i - 1];
+            const size_t len = offsets[i] - start;
+            ASSERT_EQ(len, 1) << "row " << i << " expected single-entry map";
+            EXPECT_FALSE(keys_nullable.is_null_at(start));
+            EXPECT_FALSE(vals_nullable.is_null_at(start));
+            auto kref = keys.get_data_at(start);
+            auto vref = vals.get_data_at(start);
+            EXPECT_EQ(std::string(kref.data, kref.size), "k" + 
std::to_string(i)) << "row " << i;
+            EXPECT_EQ(std::string(vref.data, vref.size), "v" + 
std::to_string(i)) << "row " << i;
+        }
+    }
+    {
+        // STRUCT<int, string>: (0,"name0"), (1,"name1"), (2,"name2")
+        SCOPED_TRACE("c_struct");
+        const auto& st = assert_cast<const ColumnStruct&>(
+                
*dst.get_by_position(dst.get_position_by_name("c_struct")).column);
+        ASSERT_EQ(st.tuple_size(), 2);
+        const auto& f0_nullable = assert_cast<const 
ColumnNullable&>(st.get_column(0));
+        const auto& f0 = assert_cast<const 
ColumnInt32&>(f0_nullable.get_nested_column());
+        const auto& f1_nullable = assert_cast<const 
ColumnNullable&>(st.get_column(1));
+        const auto& f1 = assert_cast<const 
ColumnString&>(f1_nullable.get_nested_column());
+        ASSERT_EQ(st.size(), 3);
+        for (size_t i = 0; i < 3; ++i) {
+            EXPECT_FALSE(f0_nullable.is_null_at(i));
+            EXPECT_FALSE(f1_nullable.is_null_at(i));
+            EXPECT_EQ(f0.get_data()[i], static_cast<int32_t>(i)) << "row " << 
i;
+            auto sref = f1.get_data_at(i);
+            EXPECT_EQ(std::string(sref.data, sref.size), "name" + 
std::to_string(i)) << "row " << i;
+        }
+    }
+    {
+        // c_variant: round-trip semantics are intentionally lossy at the 
structure
+        // level — DataTypeVariantSerDe::read_one_cell_from_jsonb re-inserts 
each cell
+        // as a single path-less Variant whose root holds the encoded JSON 
blob/string.
+        // Validate that (a) the row count matches and (b) each row serializes 
back to
+        // a non-empty JSON string equivalent to what we originally fed in, 
regardless
+        // of internal subcolumn layout.
+        SCOPED_TRACE("c_variant");
+        const auto& col = assert_cast<const ColumnVariant&>(
+                
*dst.get_by_position(dst.get_position_by_name("c_variant")).column);
+        ASSERT_EQ(col.size(), 3);
+        DataTypeSerDe::FormatOptions opts;
+        const std::vector<std::string> expected = {"{\"a\":1}", 
"{\"b\":\"hello\"}", "[1,2,3]"};
+        for (size_t i = 0; i < 3; ++i) {
+            std::string out;
+            col.serialize_one_row_to_string(static_cast<int64_t>(i), &out, 
opts);
+            EXPECT_FALSE(out.empty()) << "row " << i;
+            EXPECT_EQ(out, expected[i]) << "row " << i;
+        }
+    }
+
+    // --- Nullable<> wrappers. ---
+    {
+        // c_nullable_int: null, 42, null  (insert_default for nullable 
produces a null)
+        SCOPED_TRACE("c_nullable_int");
+        const auto& col = assert_cast<const ColumnNullable&>(
+                
*dst.get_by_position(dst.get_position_by_name("c_nullable_int")).column);
+        const auto& nested = assert_cast<const 
ColumnInt32&>(col.get_nested_column());
+        ASSERT_EQ(col.size(), 3);
+        EXPECT_TRUE(col.is_null_at(0));
+        EXPECT_FALSE(col.is_null_at(1));
+        EXPECT_EQ(nested.get_data()[1], 42);
+        EXPECT_TRUE(col.is_null_at(2));
+    }
+    {
+        // c_nullable_string: "a", null, "c"
+        SCOPED_TRACE("c_nullable_string");
+        const auto& col = assert_cast<const ColumnNullable&>(
+                
*dst.get_by_position(dst.get_position_by_name("c_nullable_string")).column);
+        const auto& nested = assert_cast<const 
ColumnString&>(col.get_nested_column());
+        ASSERT_EQ(col.size(), 3);
+        EXPECT_FALSE(col.is_null_at(0));
+        auto r0 = nested.get_data_at(0);
+        EXPECT_EQ(std::string(r0.data, r0.size), "a");
+        EXPECT_TRUE(col.is_null_at(1));
+        EXPECT_FALSE(col.is_null_at(2));
+        auto r2 = nested.get_data_at(2);
+        EXPECT_EQ(std::string(r2.data, r2.size), "c");
+    }
+    {
+        // c_nullable_bitmap: null, {7,9}, null
+        SCOPED_TRACE("c_nullable_bitmap");
+        const auto& col = assert_cast<const ColumnNullable&>(
+                
*dst.get_by_position(dst.get_position_by_name("c_nullable_bitmap")).column);
+        const auto& nested = assert_cast<const 
ColumnBitmap&>(col.get_nested_column());
+        ASSERT_EQ(col.size(), 3);
+        EXPECT_TRUE(col.is_null_at(0));
+        EXPECT_FALSE(col.is_null_at(1));
+        EXPECT_EQ(nested.get_data()[1].cardinality(), 2);
+        EXPECT_TRUE(nested.get_data()[1].contains(uint64_t {7}));
+        EXPECT_TRUE(nested.get_data()[1].contains(uint64_t {9}));
+        EXPECT_TRUE(col.is_null_at(2));
+    }
+}
+
+// Generator (NOT part of the normal verify suite — SKIPped unless
+// DORIS_REGEN_JSONB_GOLDEN=1 is set). When intentionally changing the encoder,
+// run this test once to refresh the golden file, then `git add` it.
+//
+// This is the only test that exercises block_to_jsonb on the all-types 
fixture.
+TEST(BlockSerializeTest, GenerateAllRowStoreSupportedTypesGolden) {
+    if (std::getenv("DORIS_REGEN_JSONB_GOLDEN") == nullptr) {
+        GTEST_SKIP() << "set DORIS_REGEN_JSONB_GOLDEN=1 (and DORIS_HOME) to 
regenerate "
+                     << 
"be/test/util/test_data/block_to_jsonb_all_types_golden.bin";
+    }
+    const char* doris_home = std::getenv("DORIS_HOME");
+    ASSERT_NE(doris_home, nullptr) << "DORIS_HOME must be set so the golden 
file can be "
+                                      "written into the source tree.";
+
+    Block block;
+    TabletSchema schema;
+    build_all_row_store_types_block(block, schema);
+
+    MutableColumnPtr jsonb_col = ColumnString::create();
+    JsonbSerializeUtil::block_to_jsonb(schema, block, 
assert_cast<ColumnString&>(*jsonb_col),
+                                       block.columns(),
+                                       
create_data_type_serdes(block.get_data_types()), {});
+
+    const std::string src_path = std::string(doris_home) + 
"/be/test/util/test_data/" +
+                                 "block_to_jsonb_all_types_golden.bin";
+    dump_jsonb_column_to_file(assert_cast<const ColumnString&>(*jsonb_col), 
src_path);
+    std::cout << "[golden] wrote " << jsonb_col->size() << " row(s) to " << 
src_path
+              << "\n[golden] commit this file, then rerun the verifier "
+              << "(BlockSerializeTest.AllRowStoreSupportedTypes) to confirm." 
<< std::endl;
+}
+
 } // namespace doris
diff --git a/be/test/util/test_data/block_to_jsonb_all_types_golden.bin 
b/be/test/util/test_data/block_to_jsonb_all_types_golden.bin
new file mode 100644
index 00000000000..04ef584699d
Binary files /dev/null and 
b/be/test/util/test_data/block_to_jsonb_all_types_golden.bin differ


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to