kokila-19 commented on code in PR #6138:
URL: https://github.com/apache/hive/pull/6138#discussion_r2452863418
##########
iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/BaseHiveIcebergMetaHook.java:
##########
@@ -217,28 +222,83 @@ private void validateCatalogConfigsDefined() {
}
}
+ /**
+ * Persists the table's write sort order based on the HMS property
'default-sort-order'
+ * that is populated by the DDL layer.
+ * <p>
+ * Behaviour:
+ * - If the JSON represents Z-order, we remove DEFAULT_SORT_ORDER
+ * as Iceberg does not have Z-order support in its spec.
+ * So, we persist Z-order metadata in {@link
org.apache.iceberg.mr.InputFormatConfig#SORT_ORDER}
+ * and {@link org.apache.iceberg.mr.InputFormatConfig#SORT_COLUMNS} to be
used by Hive Writer.
+ * <p>
+ * - Otherwise, the JSON is a list of SortFields; we convert it to Iceberg
+ * SortOrder JSON and keep it in DEFAULT_SORT_ORDER for Iceberg to use it.
+ */
private void setSortOrder(org.apache.hadoop.hive.metastore.api.Table
hmsTable, Schema schema,
Properties properties) {
- String sortOderJSONString =
hmsTable.getParameters().get(TableProperties.DEFAULT_SORT_ORDER);
- SortFields sortFields = null;
- if (!Strings.isNullOrEmpty(sortOderJSONString)) {
- try {
- sortFields = JSON_OBJECT_MAPPER.reader().readValue(sortOderJSONString,
SortFields.class);
- } catch (Exception e) {
- LOG.warn("Can not read write order json: {}", sortOderJSONString, e);
- return;
- }
+ String sortOrderJSONString =
hmsTable.getParameters().get(TableProperties.DEFAULT_SORT_ORDER);
+ if (Strings.isNullOrEmpty(sortOrderJSONString)) {
+ return;
+ }
+
+ if (isZOrderJSON(sortOrderJSONString)) {
+ properties.remove(TableProperties.DEFAULT_SORT_ORDER);
+ setZOrderSortOrder(sortOrderJSONString, properties);
+ return;
+ }
+
+ try {
+ SortFields sortFields =
JSON_OBJECT_MAPPER.reader().readValue(sortOrderJSONString, SortFields.class);
if (sortFields != null && !sortFields.getSortFields().isEmpty()) {
- SortOrder.Builder sortOderBuilder = SortOrder.builderFor(schema);
+ SortOrder.Builder sortOrderBuilder = SortOrder.builderFor(schema);
sortFields.getSortFields().forEach(fieldDesc -> {
NullOrder nullOrder = fieldDesc.getNullOrdering() ==
NullOrdering.NULLS_FIRST ?
- NullOrder.NULLS_FIRST : NullOrder.NULLS_LAST;
+ NullOrder.NULLS_FIRST : NullOrder.NULLS_LAST;
SortDirection sortDirection = fieldDesc.getDirection() ==
SortFieldDesc.SortDirection.ASC ?
- SortDirection.ASC : SortDirection.DESC;
- sortOderBuilder.sortBy(fieldDesc.getColumnName(), sortDirection,
nullOrder);
+ SortDirection.ASC : SortDirection.DESC;
+ sortOrderBuilder.sortBy(fieldDesc.getColumnName(), sortDirection,
nullOrder);
});
- properties.put(TableProperties.DEFAULT_SORT_ORDER,
SortOrderParser.toJson(sortOderBuilder.build()));
+ properties.put(TableProperties.DEFAULT_SORT_ORDER,
SortOrderParser.toJson(sortOrderBuilder.build()));
}
+ } catch (Exception e) {
+ LOG.warn("Can not read write order json: {}", sortOrderJSONString);
+ }
+ }
+
+ /**
+ * Configures the Z-order sort order metadata in the given properties
+ * based on the specified Z-order fields.
+ *
+ * @param jsonString the JSON string representing sort orders
+ * @param properties the Properties object to store sort order metadata
+ */
+ private void setZOrderSortOrder(String jsonString, Properties properties) {
+ try {
+ ZOrderFields zorderFields =
JSON_OBJECT_MAPPER.reader().readValue(jsonString, ZOrderFields.class);
+ if (zorderFields != null && !zorderFields.getZOrderFields().isEmpty()) {
+ List<String> columnNames = zorderFields.getZOrderFields().stream()
+ .map(ZOrderFieldDesc::getColumnName)
+ .collect(Collectors.toList());
+
+ LOG.info("Setting Z-order sort order for columns: {}", columnNames);
+
+ properties.put(SORT_ORDER, "ZORDER");
Review Comment:
Lexical sorting: Uses Iceberg's native
[DEFAULT_SORT_ORDER](https://github.com/apache/iceberg/blob/cd097eb7e94686854f8b0cb42c77d3d1362a9ab8/core/src/main/java/org/apache/iceberg/TableProperties.java#L66)
property, stored in the Iceberg table spec/metadata
Z-Order: Uses HMS table properties SORT_ORDER and SORT_COLUMNS because
Z-Order is not part of the Iceberg spec
It is only checked in below code to verify if Zorder is present or not.
`if ("ZORDER".equalsIgnoreCase(props.getOrDefault(SORT_ORDER, "")))`
If SORT_ORDER is not present, then we use Iceberg's native sort
order(DEFAULT_SORT_ORDER), not a separate "LEXICAL" value. If we want to add
LEXICAL to have same as DEFAULT_SORT_ORDER, then it would be a duplicate data.
So, enum might not be a good fit for this scenario.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]