deniskuzZ commented on code in PR #6138:
URL: https://github.com/apache/hive/pull/6138#discussion_r2454869606
##########
iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/BaseHiveIcebergMetaHook.java:
##########
@@ -217,28 +222,83 @@ private void validateCatalogConfigsDefined() {
}
}
+ /**
+ * Persists the table's write sort order based on the HMS property
'default-sort-order'
+ * that is populated by the DDL layer.
+ * <p>
+ * Behaviour:
+ * - If the JSON represents Z-order, we remove DEFAULT_SORT_ORDER
+ * as Iceberg does not have Z-order support in its spec.
+ * So, we persist Z-order metadata in {@link
org.apache.iceberg.mr.InputFormatConfig#SORT_ORDER}
+ * and {@link org.apache.iceberg.mr.InputFormatConfig#SORT_COLUMNS} to be
used by Hive Writer.
+ * <p>
+ * - Otherwise, the JSON is a list of SortFields; we convert it to Iceberg
+ * SortOrder JSON and keep it in DEFAULT_SORT_ORDER for Iceberg to use it.
+ */
private void setSortOrder(org.apache.hadoop.hive.metastore.api.Table
hmsTable, Schema schema,
Properties properties) {
- String sortOderJSONString =
hmsTable.getParameters().get(TableProperties.DEFAULT_SORT_ORDER);
- SortFields sortFields = null;
- if (!Strings.isNullOrEmpty(sortOderJSONString)) {
- try {
- sortFields = JSON_OBJECT_MAPPER.reader().readValue(sortOderJSONString,
SortFields.class);
- } catch (Exception e) {
- LOG.warn("Can not read write order json: {}", sortOderJSONString, e);
- return;
- }
+ String sortOrderJSONString =
hmsTable.getParameters().get(TableProperties.DEFAULT_SORT_ORDER);
+ if (Strings.isNullOrEmpty(sortOrderJSONString)) {
+ return;
+ }
+
+ if (isZOrderJSON(sortOrderJSONString)) {
+ properties.remove(TableProperties.DEFAULT_SORT_ORDER);
+ setZOrderSortOrder(sortOrderJSONString, properties);
+ return;
+ }
+
+ try {
+ SortFields sortFields =
JSON_OBJECT_MAPPER.reader().readValue(sortOrderJSONString, SortFields.class);
if (sortFields != null && !sortFields.getSortFields().isEmpty()) {
- SortOrder.Builder sortOderBuilder = SortOrder.builderFor(schema);
+ SortOrder.Builder sortOrderBuilder = SortOrder.builderFor(schema);
sortFields.getSortFields().forEach(fieldDesc -> {
NullOrder nullOrder = fieldDesc.getNullOrdering() ==
NullOrdering.NULLS_FIRST ?
- NullOrder.NULLS_FIRST : NullOrder.NULLS_LAST;
+ NullOrder.NULLS_FIRST : NullOrder.NULLS_LAST;
SortDirection sortDirection = fieldDesc.getDirection() ==
SortFieldDesc.SortDirection.ASC ?
- SortDirection.ASC : SortDirection.DESC;
- sortOderBuilder.sortBy(fieldDesc.getColumnName(), sortDirection,
nullOrder);
+ SortDirection.ASC : SortDirection.DESC;
+ sortOrderBuilder.sortBy(fieldDesc.getColumnName(), sortDirection,
nullOrder);
});
- properties.put(TableProperties.DEFAULT_SORT_ORDER,
SortOrderParser.toJson(sortOderBuilder.build()));
+ properties.put(TableProperties.DEFAULT_SORT_ORDER,
SortOrderParser.toJson(sortOrderBuilder.build()));
}
+ } catch (Exception e) {
+ LOG.warn("Can not read write order json: {}", sortOrderJSONString);
+ }
+ }
+
+ /**
+ * Configures the Z-order sort order metadata in the given properties
+ * based on the specified Z-order fields.
+ *
+ * @param jsonString the JSON string representing sort orders
+ * @param properties the Properties object to store sort order metadata
+ */
+ private void setZOrderSortOrder(String jsonString, Properties properties) {
+ try {
+ ZOrderFields zorderFields =
JSON_OBJECT_MAPPER.reader().readValue(jsonString, ZOrderFields.class);
+ if (zorderFields != null && !zorderFields.getZOrderFields().isEmpty()) {
+ List<String> columnNames = zorderFields.getZOrderFields().stream()
+ .map(ZOrderFieldDesc::getColumnName)
+ .collect(Collectors.toList());
+
+ LOG.info("Setting Z-order sort order for columns: {}", columnNames);
+
+ properties.put(SORT_ORDER, "ZORDER");
Review Comment:
i don't follow. enum is used to describe list of SortTypes.
````
properties.put(SORT_ORDER, SortType.ZORDER.name())
````
and later
````
if (SortType.ZORDER.name().equalsIgnoreCase(props.getOrDefault(SORT_TYPE,
""))) {
````
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]