github-actions[bot] commented on code in PR #63182:
URL: https://github.com/apache/doris/pull/63182#discussion_r3227055861


##########
fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java:
##########
@@ -3213,8 +3212,11 @@ public void validateForFlexiblePartialUpdate() throws 
UserException {
             throw new UserException("Flexible partial update can only support 
table with light_schema_change enabled."
                     + " But table " + getName() + "'s property 
light_schema_change is false");
         }
-        if (hasVariantColumns()) {
-            throw new UserException("Flexible partial update can only support 
table without variant columns.");
+        for (Column column : getBaseSchema()) {
+            if (column.getType().isVariantType() && 
column.getVariantEnableDocMode()) {

Review Comment:
   This new doc-mode restriction is only checked by load/routine-load 
validation after the skip-bitmap column already exists. The `ALTER TABLE ... 
ENABLE FEATURE "UPDATE_FLEXIBLE_COLUMNS"` path in `AlterTableCommand` does not 
call this helper because `hasSkipBitmapColumn()` is false before it adds the 
hidden column, so a MoW table with a doc-mode VARIANT column can still enable 
flexible partial update successfully and then every flexible load is rejected 
later by this check/BE. Please enforce the doc-mode check in the ALTER-enable 
path as well, or split the table-shape validation so unsupported tables are 
rejected before adding the feature.



##########
be/src/exec/common/variant_util.cpp:
##########
@@ -2141,6 +2147,343 @@ phmap::flat_hash_map<std::string_view, 
ColumnVariant::Subcolumn> materialize_doc
     return subcolumns;
 }
 
+constexpr uint64_t VARIANT_PATCH_PATH_MARKER_MASK = 1ULL << 63;
+constexpr uint64_t VARIANT_PATCH_PATH_MARKER_PAYLOAD_MASK = 
~VARIANT_PATCH_PATH_MARKER_MASK;
+constexpr uint64_t VARIANT_PATCH_PATH_HASH_SEED_0 = 0x4d4f575641524941ULL;
+constexpr uint64_t VARIANT_PATCH_PATH_HASH_SEED_1 = 0x4d4f575041544831ULL;
+
+// The hidden skip bitmap stores top-level column unique ids, so VARIANT patch 
metadata uses
+// values outside the int32 uid range. Each path is represented by two 
column-scoped 63-bit
+// hash markers with the high marker bit set.
+bool is_variant_patch_path_marker(uint64_t value) {
+    return (value & VARIANT_PATCH_PATH_MARKER_MASK) != 0;
+}
+
+namespace {
+
+using VariantPatchMarkerPair = std::array<uint64_t, 2>;
+
+uint64_t normalized_variant_col_unique_id(int32_t variant_col_unique_id) {
+    CHECK_GE(variant_col_unique_id, 0);
+    return static_cast<uint64_t>(variant_col_unique_id);
+}
+
+uint64_t variant_patch_path_hash_payload(int32_t variant_col_unique_id, const 
PathInData& path,
+                                         uint64_t hash_seed) {
+    const auto path_text = path.get_path();
+    const uint64_t normalized_uid = 
normalized_variant_col_unique_id(variant_col_unique_id);
+    const uint64_t column_scoped_seed =
+            hash_seed ^ (normalized_uid << 32) ^ (normalized_uid >> 32) ^ 
normalized_uid;
+    return HashUtil::xxHash64WithSeed(path_text.data(), path_text.size(), 
column_scoped_seed) &
+           VARIANT_PATCH_PATH_MARKER_PAYLOAD_MASK;
+}
+
+uint64_t variant_patch_marker(uint64_t payload) {
+    return VARIANT_PATCH_PATH_MARKER_MASK | payload;
+}
+
+VariantPatchMarkerPair variant_patch_path_markers(int32_t 
variant_col_unique_id,
+                                                  const PathInData& path) {
+    uint64_t first_payload = 
variant_patch_path_hash_payload(variant_col_unique_id, path,
+                                                             
VARIANT_PATCH_PATH_HASH_SEED_0);
+    uint64_t second_payload = 
variant_patch_path_hash_payload(variant_col_unique_id, path,
+                                                              
VARIANT_PATCH_PATH_HASH_SEED_1);
+    if (first_payload == second_payload) {
+        second_payload = (second_payload + 1) & 
VARIANT_PATCH_PATH_MARKER_PAYLOAD_MASK;
+    }
+    return {variant_patch_marker(first_payload), 
variant_patch_marker(second_payload)};
+}
+
+void add_variant_patch_markers(const VariantPatchMarkerPair& markers, 
BitmapValue* bitmap) {
+    bitmap->add(markers[0]);
+    bitmap->add(markers[1]);
+}
+
+bool contains_variant_patch_markers(const BitmapValue& bitmap,
+                                    const VariantPatchMarkerPair& markers) {
+    return bitmap.contains(markers[0]) && bitmap.contains(markers[1]);
+}
+
+Status variant_object_patch_required_status() {
+    return Status::NotSupported(
+            "VARIANT flexible partial update only supports JSON object patch 
values");
+}
+
+Status variant_doc_mode_not_supported_status() {
+    return Status::NotSupported(
+            "VARIANT flexible partial update does not support doc mode in this 
version");
+}
+
+const ColumnVariant& get_variant_nested_column(const IColumn& column) {
+    if (column.is_nullable()) {
+        return assert_cast<const ColumnVariant&>(
+                assert_cast<const 
ColumnNullable&>(column).get_nested_column());
+    }
+    return assert_cast<const ColumnVariant&>(column);
+}
+
+ColumnVariant& get_variant_nested_column(IColumn& column) {
+    if (column.is_nullable()) {
+        return assert_cast<ColumnVariant&>(
+                assert_cast<ColumnNullable&>(column).get_nested_column());
+    }
+    return assert_cast<ColumnVariant&>(column);
+}
+
+bool is_path_prefix_of(const PathInData& prefix, const PathInData& path) {
+    const auto& prefix_parts = prefix.get_parts();
+    const auto& path_parts = path.get_parts();
+    if (prefix_parts.size() > path_parts.size()) {
+        return false;
+    }
+    return std::equal(prefix_parts.begin(), prefix_parts.end(), 
path_parts.begin());
+}
+
+bool paths_conflict(const PathInData& left, const PathInData& right) {
+    return is_path_prefix_of(left, right) || is_path_prefix_of(right, left);
+}
+
+bool starts_with_json_object(std::string_view text) {
+    auto it = std::ranges::find_if_not(text, [](unsigned char ch) { return 
std::isspace(ch); });
+    return it != text.end() && *it == '{';
+}
+
+bool root_jsonb_field_to_json_text(const Field& field, std::string* json_text) 
{
+    switch (field.get_type()) {
+    case PrimitiveType::TYPE_JSONB: {
+        const auto& jsonb = field.get<PrimitiveType::TYPE_JSONB>();
+        *json_text = JsonbToJson::jsonb_to_json_string(jsonb.get_value(), 
jsonb.get_size());
+        return true;
+    }
+    default:
+        return false;
+    }
+}
+
+bool collect_json_object_text_map(std::string_view json_text, bool 
reject_json_null_value,
+                                  VariantMap* object) {
+    if (!starts_with_json_object(json_text)) {
+        return false;
+    }
+
+    auto parsed = ColumnVariant::create(0, false);
+    ParseConfig config;
+    config.parse_to = ParseConfig::ParseTo::OnlySubcolumns;
+    config.reject_json_null_value = reject_json_null_value;
+    StringRef json_ref {json_text.data(), json_text.size()};
+    parse_json_to_variant(*parsed, json_ref, nullptr, config);
+    parsed->finalize();
+
+    Field parsed_field;
+    parsed->get(0, parsed_field);
+    if (parsed_field.get_type() != PrimitiveType::TYPE_VARIANT) {
+        return false;
+    }
+    const auto& parsed_object = 
parsed_field.get<PrimitiveType::TYPE_VARIANT>();
+    if (parsed_object.contains(PathInData())) {
+        return false;
+    }
+    for (const auto& [path, value] : parsed_object) {
+        if (!path.empty()) {
+            object->insert_or_assign(path, value);
+        }
+    }
+    return true;
+}
+
+void collect_materialized_variant_map(const ColumnVariant& variant, size_t 
row, VariantMap* object,
+                                      FieldWithDataType* root_field) {
+    Field field;
+    variant.get(row, field);
+    if (field.get_type() == PrimitiveType::TYPE_VARIANT) {
+        for (const auto& [path, value] : 
field.get<PrimitiveType::TYPE_VARIANT>()) {
+            if (path.get_path() == DOC_VALUE_COLUMN_PATH) {
+                continue;
+            }
+            if (path.empty()) {
+                *root_field = value;
+                continue;
+            }
+            object->insert_or_assign(path, value);
+        }
+    }
+
+    DCHECK(!variant.has_doc_value_column(row));
+}
+
+Status collect_variant_patch_map(const ColumnVariant& variant, size_t row, 
bool* is_object_patch,
+                                 VariantMap* object) {
+    object->clear();
+    FieldWithDataType root_field;
+    collect_materialized_variant_map(variant, row, object, &root_field);
+    if (root_field.field.get_type() == PrimitiveType::TYPE_NULL) {
+        *is_object_patch = true;
+        return Status::OK();
+    }
+
+    std::string json_text;
+    if (!root_jsonb_field_to_json_text(root_field.field, &json_text)) {
+        *is_object_patch = false;
+        return Status::OK();
+    }
+    object->clear();
+    *is_object_patch = collect_json_object_text_map(json_text, true, object);
+    return Status::OK();
+}
+
+void collect_variant_base_map(const ColumnVariant& variant, size_t row, 
VariantMap* object) {
+    object->clear();
+    FieldWithDataType root_field;
+    collect_materialized_variant_map(variant, row, object, &root_field);
+    std::string json_text;
+    if (root_jsonb_field_to_json_text(root_field.field, &json_text)) {
+        collect_json_object_text_map(json_text, false, object);
+    }
+}
+
+Status insert_variant_field(IColumn& dst_column, const Field& field) {
+    DCHECK(!get_variant_nested_column(dst_column).enable_doc_mode());
+    dst_column.insert(field);
+    return Status::OK();
+}
+
+Status check_variant_object_patch_supported(const IColumn& column) {
+    if (get_variant_nested_column(column).enable_doc_mode()) {
+        return variant_doc_mode_not_supported_status();
+    }
+    return Status::OK();
+}
+
+Status merge_variant_object_patch(const IColumn& old_column, size_t old_row,
+                                  VariantMap&& patch_object, IColumn& 
dst_column) {
+    VariantMap merged_object;
+    if (!old_column.is_null_at(old_row)) {
+        collect_variant_base_map(get_variant_nested_column(old_column), 
old_row, &merged_object);
+    }
+    for (const auto& [patch_path, _] : patch_object) {
+        for (auto it = merged_object.begin(); it != merged_object.end();) {
+            if (paths_conflict(patch_path, it->first)) {
+                it = merged_object.erase(it);
+            } else {
+                ++it;
+            }
+        }
+    }
+    for (auto& [patch_path, patch_value] : patch_object) {
+        merged_object.insert_or_assign(patch_path, std::move(patch_value));
+    }
+
+    Field merged_field = 
Field::create_field<TYPE_VARIANT>(std::move(merged_object));
+    return insert_variant_field(dst_column, merged_field);
+}
+
+Status insert_variant_object_patch(VariantMap&& patch_object, IColumn& 
dst_column) {
+    Field patch_field = 
Field::create_field<TYPE_VARIANT>(std::move(patch_object));
+    return insert_variant_field(dst_column, patch_field);
+}
+
+} // namespace
+
+Status mark_variant_patch_paths(const IColumn& patch_column, size_t patch_row,
+                                int32_t variant_col_unique_id, BitmapValue* 
patch_path_markers) {
+    RETURN_IF_CATCH_EXCEPTION({
+        if (patch_column.is_null_at(patch_row)) {
+            return variant_object_patch_required_status();
+        }
+        RETURN_IF_ERROR(check_variant_object_patch_supported(patch_column));
+
+        bool is_object_patch = false;
+        VariantMap patch_object;
+        
RETURN_IF_ERROR(collect_variant_patch_map(get_variant_nested_column(patch_column),
+                                                  patch_row, &is_object_patch, 
&patch_object));
+        if (!is_object_patch) {
+            return variant_object_patch_required_status();
+        }
+
+        std::set<VariantPatchMarkerPair> seen_path_markers;
+        for (const auto& [path, _] : patch_object) {
+            auto markers = variant_patch_path_markers(variant_col_unique_id, 
path);

Review Comment:
   This only marks materialized leaf paths, so a patch like `{"a":{}}` is 
accepted as an object patch but records no marker for `a`. In same-batch merge 
`merge_variant_patch()` also sees an empty `patch_object` and leaves the old 
value unchanged; in publish-conflict merge 
`merge_variant_patch_by_path_markers()` cannot select `a` either. For an 
existing row such as `v={"a":{"x":1},"b":2}`, loading `{"k":1,"v":{"a":{}}}` 
should at least remove/replace the conflicting `a.x` path, but with the current 
marker set it becomes indistinguishable from a root `{}` no-op and stale data 
remains. Please either preserve/mark empty-object paths or reject such patches, 
and add coverage for nested empty object patches in same-batch and 
publish-conflict paths.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to