eldenmoon commented on code in PR #63182:
URL: https://github.com/apache/doris/pull/63182#discussion_r3228969005
##########
be/src/exec/common/variant_util.cpp:
##########
@@ -2141,6 +2147,343 @@ phmap::flat_hash_map<std::string_view,
ColumnVariant::Subcolumn> materialize_doc
return subcolumns;
}
+constexpr uint64_t VARIANT_PATCH_PATH_MARKER_MASK = 1ULL << 63;
+constexpr uint64_t VARIANT_PATCH_PATH_MARKER_PAYLOAD_MASK =
~VARIANT_PATCH_PATH_MARKER_MASK;
+constexpr uint64_t VARIANT_PATCH_PATH_HASH_SEED_0 = 0x4d4f575641524941ULL;
+constexpr uint64_t VARIANT_PATCH_PATH_HASH_SEED_1 = 0x4d4f575041544831ULL;
+
+// The hidden skip bitmap stores top-level column unique ids, so VARIANT patch
metadata uses
+// values outside the int32 uid range. Each path is represented by two
column-scoped 63-bit
+// hash markers with the high marker bit set.
+bool is_variant_patch_path_marker(uint64_t value) {
+ return (value & VARIANT_PATCH_PATH_MARKER_MASK) != 0;
+}
+
+namespace {
+
+using VariantPatchMarkerPair = std::array<uint64_t, 2>;
+
+uint64_t normalized_variant_col_unique_id(int32_t variant_col_unique_id) {
+ CHECK_GE(variant_col_unique_id, 0);
+ return static_cast<uint64_t>(variant_col_unique_id);
+}
+
+uint64_t variant_patch_path_hash_payload(int32_t variant_col_unique_id, const
PathInData& path,
+ uint64_t hash_seed) {
+ const auto path_text = path.get_path();
+ const uint64_t normalized_uid =
normalized_variant_col_unique_id(variant_col_unique_id);
+ const uint64_t column_scoped_seed =
+ hash_seed ^ (normalized_uid << 32) ^ (normalized_uid >> 32) ^
normalized_uid;
+ return HashUtil::xxHash64WithSeed(path_text.data(), path_text.size(),
column_scoped_seed) &
+ VARIANT_PATCH_PATH_MARKER_PAYLOAD_MASK;
+}
+
+uint64_t variant_patch_marker(uint64_t payload) {
+ return VARIANT_PATCH_PATH_MARKER_MASK | payload;
+}
+
+VariantPatchMarkerPair variant_patch_path_markers(int32_t
variant_col_unique_id,
+ const PathInData& path) {
+ uint64_t first_payload =
variant_patch_path_hash_payload(variant_col_unique_id, path,
+
VARIANT_PATCH_PATH_HASH_SEED_0);
+ uint64_t second_payload =
variant_patch_path_hash_payload(variant_col_unique_id, path,
+
VARIANT_PATCH_PATH_HASH_SEED_1);
+ if (first_payload == second_payload) {
+ second_payload = (second_payload + 1) &
VARIANT_PATCH_PATH_MARKER_PAYLOAD_MASK;
+ }
+ return {variant_patch_marker(first_payload),
variant_patch_marker(second_payload)};
+}
+
+void add_variant_patch_markers(const VariantPatchMarkerPair& markers,
BitmapValue* bitmap) {
+ bitmap->add(markers[0]);
+ bitmap->add(markers[1]);
+}
+
+bool contains_variant_patch_markers(const BitmapValue& bitmap,
+ const VariantPatchMarkerPair& markers) {
+ return bitmap.contains(markers[0]) && bitmap.contains(markers[1]);
+}
+
+Status variant_object_patch_required_status() {
+ return Status::NotSupported(
+ "VARIANT flexible partial update only supports JSON object patch
values");
+}
+
+Status variant_doc_mode_not_supported_status() {
+ return Status::NotSupported(
+ "VARIANT flexible partial update does not support doc mode in this
version");
+}
+
+const ColumnVariant& get_variant_nested_column(const IColumn& column) {
+ if (column.is_nullable()) {
+ return assert_cast<const ColumnVariant&>(
+ assert_cast<const
ColumnNullable&>(column).get_nested_column());
+ }
+ return assert_cast<const ColumnVariant&>(column);
+}
+
+ColumnVariant& get_variant_nested_column(IColumn& column) {
+ if (column.is_nullable()) {
+ return assert_cast<ColumnVariant&>(
+ assert_cast<ColumnNullable&>(column).get_nested_column());
+ }
+ return assert_cast<ColumnVariant&>(column);
+}
+
+bool is_path_prefix_of(const PathInData& prefix, const PathInData& path) {
+ const auto& prefix_parts = prefix.get_parts();
+ const auto& path_parts = path.get_parts();
+ if (prefix_parts.size() > path_parts.size()) {
+ return false;
+ }
+ return std::equal(prefix_parts.begin(), prefix_parts.end(),
path_parts.begin());
+}
+
+bool paths_conflict(const PathInData& left, const PathInData& right) {
+ return is_path_prefix_of(left, right) || is_path_prefix_of(right, left);
+}
+
+bool starts_with_json_object(std::string_view text) {
+ auto it = std::ranges::find_if_not(text, [](unsigned char ch) { return
std::isspace(ch); });
+ return it != text.end() && *it == '{';
+}
+
+bool root_jsonb_field_to_json_text(const Field& field, std::string* json_text)
{
+ switch (field.get_type()) {
+ case PrimitiveType::TYPE_JSONB: {
+ const auto& jsonb = field.get<PrimitiveType::TYPE_JSONB>();
+ *json_text = JsonbToJson::jsonb_to_json_string(jsonb.get_value(),
jsonb.get_size());
+ return true;
+ }
+ default:
+ return false;
+ }
+}
+
+bool collect_json_object_text_map(std::string_view json_text, bool
reject_json_null_value,
+ VariantMap* object) {
+ if (!starts_with_json_object(json_text)) {
+ return false;
+ }
+
+ auto parsed = ColumnVariant::create(0, false);
+ ParseConfig config;
+ config.parse_to = ParseConfig::ParseTo::OnlySubcolumns;
+ config.reject_json_null_value = reject_json_null_value;
+ StringRef json_ref {json_text.data(), json_text.size()};
+ parse_json_to_variant(*parsed, json_ref, nullptr, config);
+ parsed->finalize();
+
+ Field parsed_field;
+ parsed->get(0, parsed_field);
+ if (parsed_field.get_type() != PrimitiveType::TYPE_VARIANT) {
+ return false;
+ }
+ const auto& parsed_object =
parsed_field.get<PrimitiveType::TYPE_VARIANT>();
+ if (parsed_object.contains(PathInData())) {
+ return false;
+ }
+ for (const auto& [path, value] : parsed_object) {
+ if (!path.empty()) {
+ object->insert_or_assign(path, value);
+ }
+ }
+ return true;
+}
+
+void collect_materialized_variant_map(const ColumnVariant& variant, size_t
row, VariantMap* object,
+ FieldWithDataType* root_field) {
+ Field field;
+ variant.get(row, field);
+ if (field.get_type() == PrimitiveType::TYPE_VARIANT) {
+ for (const auto& [path, value] :
field.get<PrimitiveType::TYPE_VARIANT>()) {
+ if (path.get_path() == DOC_VALUE_COLUMN_PATH) {
+ continue;
+ }
+ if (path.empty()) {
+ *root_field = value;
+ continue;
+ }
+ object->insert_or_assign(path, value);
+ }
+ }
+
+ DCHECK(!variant.has_doc_value_column(row));
+}
+
+Status collect_variant_patch_map(const ColumnVariant& variant, size_t row,
bool* is_object_patch,
+ VariantMap* object) {
+ object->clear();
+ FieldWithDataType root_field;
+ collect_materialized_variant_map(variant, row, object, &root_field);
+ if (root_field.field.get_type() == PrimitiveType::TYPE_NULL) {
+ *is_object_patch = true;
+ return Status::OK();
+ }
+
+ std::string json_text;
+ if (!root_jsonb_field_to_json_text(root_field.field, &json_text)) {
+ *is_object_patch = false;
+ return Status::OK();
+ }
+ object->clear();
+ *is_object_patch = collect_json_object_text_map(json_text, true, object);
+ return Status::OK();
+}
+
+void collect_variant_base_map(const ColumnVariant& variant, size_t row,
VariantMap* object) {
+ object->clear();
+ FieldWithDataType root_field;
+ collect_materialized_variant_map(variant, row, object, &root_field);
+ std::string json_text;
+ if (root_jsonb_field_to_json_text(root_field.field, &json_text)) {
+ collect_json_object_text_map(json_text, false, object);
+ }
+}
+
+Status insert_variant_field(IColumn& dst_column, const Field& field) {
+ DCHECK(!get_variant_nested_column(dst_column).enable_doc_mode());
+ dst_column.insert(field);
+ return Status::OK();
+}
+
+Status check_variant_object_patch_supported(const IColumn& column) {
+ if (get_variant_nested_column(column).enable_doc_mode()) {
+ return variant_doc_mode_not_supported_status();
+ }
+ return Status::OK();
+}
+
+Status merge_variant_object_patch(const IColumn& old_column, size_t old_row,
+ VariantMap&& patch_object, IColumn&
dst_column) {
+ VariantMap merged_object;
+ if (!old_column.is_null_at(old_row)) {
+ collect_variant_base_map(get_variant_nested_column(old_column),
old_row, &merged_object);
+ }
+ for (const auto& [patch_path, _] : patch_object) {
+ for (auto it = merged_object.begin(); it != merged_object.end();) {
+ if (paths_conflict(patch_path, it->first)) {
+ it = merged_object.erase(it);
+ } else {
+ ++it;
+ }
+ }
+ }
+ for (auto& [patch_path, patch_value] : patch_object) {
+ merged_object.insert_or_assign(patch_path, std::move(patch_value));
+ }
+
+ Field merged_field =
Field::create_field<TYPE_VARIANT>(std::move(merged_object));
+ return insert_variant_field(dst_column, merged_field);
+}
+
+Status insert_variant_object_patch(VariantMap&& patch_object, IColumn&
dst_column) {
+ Field patch_field =
Field::create_field<TYPE_VARIANT>(std::move(patch_object));
+ return insert_variant_field(dst_column, patch_field);
+}
+
+} // namespace
+
+Status mark_variant_patch_paths(const IColumn& patch_column, size_t patch_row,
+ int32_t variant_col_unique_id, BitmapValue*
patch_path_markers) {
+ RETURN_IF_CATCH_EXCEPTION({
+ if (patch_column.is_null_at(patch_row)) {
+ return variant_object_patch_required_status();
+ }
+ RETURN_IF_ERROR(check_variant_object_patch_supported(patch_column));
+
+ bool is_object_patch = false;
+ VariantMap patch_object;
+
RETURN_IF_ERROR(collect_variant_patch_map(get_variant_nested_column(patch_column),
+ patch_row, &is_object_patch,
&patch_object));
+ if (!is_object_patch) {
+ return variant_object_patch_required_status();
+ }
+
+ std::set<VariantPatchMarkerPair> seen_path_markers;
+ for (const auto& [path, _] : patch_object) {
+ auto markers = variant_patch_path_markers(variant_col_unique_id,
path);
Review Comment:
Fixed in the current head. Flexible patch parsing now records empty object
paths, mark_variant_patch_paths can mark them, and
merge_variant_patch_by_path_markers replays them as replacements. Coverage was
added for same-batch replacement and publish-conflict replacement of nested
empty object patches, including removal of stale subpaths like `a.x`.
##########
fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java:
##########
@@ -3213,8 +3212,11 @@ public void validateForFlexiblePartialUpdate() throws
UserException {
throw new UserException("Flexible partial update can only support
table with light_schema_change enabled."
+ " But table " + getName() + "'s property
light_schema_change is false");
}
- if (hasVariantColumns()) {
- throw new UserException("Flexible partial update can only support
table without variant columns.");
+ for (Column column : getBaseSchema()) {
+ if (column.getType().isVariantType() &&
column.getVariantEnableDocMode()) {
Review Comment:
Fixed in the current head. ALTER ENABLE FEATURE now validates existing
VARIANT doc-mode columns before enabling flexible partial update, and
SchemaChangeHandler also revalidates the final locked base schema when the
table already has the skip bitmap column.
##########
be/src/storage/partial_update_info.cpp:
##########
@@ -770,7 +803,7 @@ Status
FlexibleReadPlan::fill_non_primary_key_columns_for_row_store(
auto block_pos = block_start_pos + idx;
auto pos_in_old_block = read_index[segment_pos];
Review Comment:
Fixed in the current head. The row-store path now uses
read_index.find(segment_pos), carries an explicit old_row_exists flag, and
avoids all old-row/delete-sign/VARIANT merge access when no old row was read.
New-key row-store VARIANT patch coverage was added to the regression suite.
##########
fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/AlterTableCommand.java:
##########
@@ -230,6 +232,33 @@ private void rewriteAlterOpForOlapTable(ConnectContext
ctx, OlapTable table) thr
ops = alterTableOps;
}
+ private void validateAlterVariantColumnsForFlexiblePartialUpdate(OlapTable
table) throws UserException {
+ boolean enableFlexiblePartialUpdate = table.hasSkipBitmapColumn();
Review Comment:
Fixed in the current head. SchemaChangeHandler revalidates VARIANT doc-mode
compatibility against the final base schema while holding the table write lock,
so a concurrent ALTER cannot invalidate the pre-lock AlterTableCommand
validation before metadata commit.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]