github-actions[bot] commented on code in PR #63182:
URL: https://github.com/apache/doris/pull/63182#discussion_r3227055861
##########
fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java:
##########
@@ -3213,8 +3212,11 @@ public void validateForFlexiblePartialUpdate() throws
UserException {
throw new UserException("Flexible partial update can only support
table with light_schema_change enabled."
+ " But table " + getName() + "'s property
light_schema_change is false");
}
- if (hasVariantColumns()) {
- throw new UserException("Flexible partial update can only support
table without variant columns.");
+ for (Column column : getBaseSchema()) {
+ if (column.getType().isVariantType() &&
column.getVariantEnableDocMode()) {
Review Comment:
This new doc-mode restriction is only checked by load/routine-load
validation after the skip-bitmap column already exists. The `ALTER TABLE ...
ENABLE FEATURE "UPDATE_FLEXIBLE_COLUMNS"` path in `AlterTableCommand` does not
call this helper because `hasSkipBitmapColumn()` is false before it adds the
hidden column, so a MoW table with a doc-mode VARIANT column can still enable
flexible partial update successfully and then every flexible load is rejected
later by this check/BE. Please enforce the doc-mode check in the ALTER-enable
path as well, or split the table-shape validation so unsupported tables are
rejected before adding the feature.
##########
be/src/exec/common/variant_util.cpp:
##########
@@ -2141,6 +2147,343 @@ phmap::flat_hash_map<std::string_view,
ColumnVariant::Subcolumn> materialize_doc
return subcolumns;
}
+constexpr uint64_t VARIANT_PATCH_PATH_MARKER_MASK = 1ULL << 63;
+constexpr uint64_t VARIANT_PATCH_PATH_MARKER_PAYLOAD_MASK =
~VARIANT_PATCH_PATH_MARKER_MASK;
+constexpr uint64_t VARIANT_PATCH_PATH_HASH_SEED_0 = 0x4d4f575641524941ULL;
+constexpr uint64_t VARIANT_PATCH_PATH_HASH_SEED_1 = 0x4d4f575041544831ULL;
+
+// The hidden skip bitmap stores top-level column unique ids, so VARIANT patch
metadata uses
+// values outside the int32 uid range. Each path is represented by two
column-scoped 63-bit
+// hash markers with the high marker bit set.
+bool is_variant_patch_path_marker(uint64_t value) {
+ return (value & VARIANT_PATCH_PATH_MARKER_MASK) != 0;
+}
+
+namespace {
+
+using VariantPatchMarkerPair = std::array<uint64_t, 2>;
+
+uint64_t normalized_variant_col_unique_id(int32_t variant_col_unique_id) {
+ CHECK_GE(variant_col_unique_id, 0);
+ return static_cast<uint64_t>(variant_col_unique_id);
+}
+
+uint64_t variant_patch_path_hash_payload(int32_t variant_col_unique_id, const
PathInData& path,
+ uint64_t hash_seed) {
+ const auto path_text = path.get_path();
+ const uint64_t normalized_uid =
normalized_variant_col_unique_id(variant_col_unique_id);
+ const uint64_t column_scoped_seed =
+ hash_seed ^ (normalized_uid << 32) ^ (normalized_uid >> 32) ^
normalized_uid;
+ return HashUtil::xxHash64WithSeed(path_text.data(), path_text.size(),
column_scoped_seed) &
+ VARIANT_PATCH_PATH_MARKER_PAYLOAD_MASK;
+}
+
+uint64_t variant_patch_marker(uint64_t payload) {
+ return VARIANT_PATCH_PATH_MARKER_MASK | payload;
+}
+
+VariantPatchMarkerPair variant_patch_path_markers(int32_t
variant_col_unique_id,
+ const PathInData& path) {
+ uint64_t first_payload =
variant_patch_path_hash_payload(variant_col_unique_id, path,
+
VARIANT_PATCH_PATH_HASH_SEED_0);
+ uint64_t second_payload =
variant_patch_path_hash_payload(variant_col_unique_id, path,
+
VARIANT_PATCH_PATH_HASH_SEED_1);
+ if (first_payload == second_payload) {
+ second_payload = (second_payload + 1) &
VARIANT_PATCH_PATH_MARKER_PAYLOAD_MASK;
+ }
+ return {variant_patch_marker(first_payload),
variant_patch_marker(second_payload)};
+}
+
+void add_variant_patch_markers(const VariantPatchMarkerPair& markers,
BitmapValue* bitmap) {
+ bitmap->add(markers[0]);
+ bitmap->add(markers[1]);
+}
+
+bool contains_variant_patch_markers(const BitmapValue& bitmap,
+ const VariantPatchMarkerPair& markers) {
+ return bitmap.contains(markers[0]) && bitmap.contains(markers[1]);
+}
+
+Status variant_object_patch_required_status() {
+ return Status::NotSupported(
+ "VARIANT flexible partial update only supports JSON object patch
values");
+}
+
+Status variant_doc_mode_not_supported_status() {
+ return Status::NotSupported(
+ "VARIANT flexible partial update does not support doc mode in this
version");
+}
+
+const ColumnVariant& get_variant_nested_column(const IColumn& column) {
+ if (column.is_nullable()) {
+ return assert_cast<const ColumnVariant&>(
+ assert_cast<const
ColumnNullable&>(column).get_nested_column());
+ }
+ return assert_cast<const ColumnVariant&>(column);
+}
+
+ColumnVariant& get_variant_nested_column(IColumn& column) {
+ if (column.is_nullable()) {
+ return assert_cast<ColumnVariant&>(
+ assert_cast<ColumnNullable&>(column).get_nested_column());
+ }
+ return assert_cast<ColumnVariant&>(column);
+}
+
+bool is_path_prefix_of(const PathInData& prefix, const PathInData& path) {
+ const auto& prefix_parts = prefix.get_parts();
+ const auto& path_parts = path.get_parts();
+ if (prefix_parts.size() > path_parts.size()) {
+ return false;
+ }
+ return std::equal(prefix_parts.begin(), prefix_parts.end(),
path_parts.begin());
+}
+
+bool paths_conflict(const PathInData& left, const PathInData& right) {
+ return is_path_prefix_of(left, right) || is_path_prefix_of(right, left);
+}
+
+bool starts_with_json_object(std::string_view text) {
+ auto it = std::ranges::find_if_not(text, [](unsigned char ch) { return
std::isspace(ch); });
+ return it != text.end() && *it == '{';
+}
+
+bool root_jsonb_field_to_json_text(const Field& field, std::string* json_text)
{
+ switch (field.get_type()) {
+ case PrimitiveType::TYPE_JSONB: {
+ const auto& jsonb = field.get<PrimitiveType::TYPE_JSONB>();
+ *json_text = JsonbToJson::jsonb_to_json_string(jsonb.get_value(),
jsonb.get_size());
+ return true;
+ }
+ default:
+ return false;
+ }
+}
+
+bool collect_json_object_text_map(std::string_view json_text, bool
reject_json_null_value,
+ VariantMap* object) {
+ if (!starts_with_json_object(json_text)) {
+ return false;
+ }
+
+ auto parsed = ColumnVariant::create(0, false);
+ ParseConfig config;
+ config.parse_to = ParseConfig::ParseTo::OnlySubcolumns;
+ config.reject_json_null_value = reject_json_null_value;
+ StringRef json_ref {json_text.data(), json_text.size()};
+ parse_json_to_variant(*parsed, json_ref, nullptr, config);
+ parsed->finalize();
+
+ Field parsed_field;
+ parsed->get(0, parsed_field);
+ if (parsed_field.get_type() != PrimitiveType::TYPE_VARIANT) {
+ return false;
+ }
+ const auto& parsed_object =
parsed_field.get<PrimitiveType::TYPE_VARIANT>();
+ if (parsed_object.contains(PathInData())) {
+ return false;
+ }
+ for (const auto& [path, value] : parsed_object) {
+ if (!path.empty()) {
+ object->insert_or_assign(path, value);
+ }
+ }
+ return true;
+}
+
+void collect_materialized_variant_map(const ColumnVariant& variant, size_t
row, VariantMap* object,
+ FieldWithDataType* root_field) {
+ Field field;
+ variant.get(row, field);
+ if (field.get_type() == PrimitiveType::TYPE_VARIANT) {
+ for (const auto& [path, value] :
field.get<PrimitiveType::TYPE_VARIANT>()) {
+ if (path.get_path() == DOC_VALUE_COLUMN_PATH) {
+ continue;
+ }
+ if (path.empty()) {
+ *root_field = value;
+ continue;
+ }
+ object->insert_or_assign(path, value);
+ }
+ }
+
+ DCHECK(!variant.has_doc_value_column(row));
+}
+
+Status collect_variant_patch_map(const ColumnVariant& variant, size_t row,
bool* is_object_patch,
+ VariantMap* object) {
+ object->clear();
+ FieldWithDataType root_field;
+ collect_materialized_variant_map(variant, row, object, &root_field);
+ if (root_field.field.get_type() == PrimitiveType::TYPE_NULL) {
+ *is_object_patch = true;
+ return Status::OK();
+ }
+
+ std::string json_text;
+ if (!root_jsonb_field_to_json_text(root_field.field, &json_text)) {
+ *is_object_patch = false;
+ return Status::OK();
+ }
+ object->clear();
+ *is_object_patch = collect_json_object_text_map(json_text, true, object);
+ return Status::OK();
+}
+
+void collect_variant_base_map(const ColumnVariant& variant, size_t row,
VariantMap* object) {
+ object->clear();
+ FieldWithDataType root_field;
+ collect_materialized_variant_map(variant, row, object, &root_field);
+ std::string json_text;
+ if (root_jsonb_field_to_json_text(root_field.field, &json_text)) {
+ collect_json_object_text_map(json_text, false, object);
+ }
+}
+
+Status insert_variant_field(IColumn& dst_column, const Field& field) {
+ DCHECK(!get_variant_nested_column(dst_column).enable_doc_mode());
+ dst_column.insert(field);
+ return Status::OK();
+}
+
+Status check_variant_object_patch_supported(const IColumn& column) {
+ if (get_variant_nested_column(column).enable_doc_mode()) {
+ return variant_doc_mode_not_supported_status();
+ }
+ return Status::OK();
+}
+
+Status merge_variant_object_patch(const IColumn& old_column, size_t old_row,
+ VariantMap&& patch_object, IColumn&
dst_column) {
+ VariantMap merged_object;
+ if (!old_column.is_null_at(old_row)) {
+ collect_variant_base_map(get_variant_nested_column(old_column),
old_row, &merged_object);
+ }
+ for (const auto& [patch_path, _] : patch_object) {
+ for (auto it = merged_object.begin(); it != merged_object.end();) {
+ if (paths_conflict(patch_path, it->first)) {
+ it = merged_object.erase(it);
+ } else {
+ ++it;
+ }
+ }
+ }
+ for (auto& [patch_path, patch_value] : patch_object) {
+ merged_object.insert_or_assign(patch_path, std::move(patch_value));
+ }
+
+ Field merged_field =
Field::create_field<TYPE_VARIANT>(std::move(merged_object));
+ return insert_variant_field(dst_column, merged_field);
+}
+
+Status insert_variant_object_patch(VariantMap&& patch_object, IColumn&
dst_column) {
+ Field patch_field =
Field::create_field<TYPE_VARIANT>(std::move(patch_object));
+ return insert_variant_field(dst_column, patch_field);
+}
+
+} // namespace
+
+Status mark_variant_patch_paths(const IColumn& patch_column, size_t patch_row,
+ int32_t variant_col_unique_id, BitmapValue*
patch_path_markers) {
+ RETURN_IF_CATCH_EXCEPTION({
+ if (patch_column.is_null_at(patch_row)) {
+ return variant_object_patch_required_status();
+ }
+ RETURN_IF_ERROR(check_variant_object_patch_supported(patch_column));
+
+ bool is_object_patch = false;
+ VariantMap patch_object;
+
RETURN_IF_ERROR(collect_variant_patch_map(get_variant_nested_column(patch_column),
+ patch_row, &is_object_patch,
&patch_object));
+ if (!is_object_patch) {
+ return variant_object_patch_required_status();
+ }
+
+ std::set<VariantPatchMarkerPair> seen_path_markers;
+ for (const auto& [path, _] : patch_object) {
+ auto markers = variant_patch_path_markers(variant_col_unique_id,
path);
Review Comment:
This only marks materialized leaf paths, so a patch like `{"a":{}}` is
accepted as an object patch but records no marker for `a`. In same-batch merge
`merge_variant_patch()` also sees an empty `patch_object` and leaves the old
value unchanged; in publish-conflict merge
`merge_variant_patch_by_path_markers()` cannot select `a` either. For an
existing row such as `v={"a":{"x":1},"b":2}`, loading `{"k":1,"v":{"a":{}}}`
should at least remove/replace the conflicting `a.x` path, but with the current
marker set it becomes indistinguishable from a root `{}` no-op and stale data
remains. Please either preserve/mark empty-object paths or reject such patches,
and add coverage for nested empty object patches in same-batch and
publish-conflict paths.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]