zratkai commented on code in PR #5192: URL: https://github.com/apache/hive/pull/5192#discussion_r1599670314
########## ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactionQueryBuilder.java: ########## @@ -275,137 +250,105 @@ String build() { case DROP: default: } - return query.toString(); } + protected void getDdlForCreate(StringBuilder query) { + defineColumns(query); + + // PARTITIONED BY. Used for parts of minor compaction. + if (isPartitioned) { + query.append(" PARTITIONED BY (`file_name` STRING) "); + } + + // STORED AS / ROW FORMAT SERDE + INPUTFORMAT + OUTPUTFORMAT + query.append(" stored as orc"); + + // LOCATION + if (location != null) { + query.append(" LOCATION '").append(HiveStringUtils.escapeHiveCommand(location)).append("'"); + } + + addTblProperties(query, false, 0); + } + + /** + * Part of Create operation. All tmp tables are not transactional and are marked as + * compaction tables. Additionally... + * - Crud compaction temp tables need tblproperty, "compactiontable." + * - Minor crud compaction temp tables need bucketing version tblproperty, if table is bucketed. + */ + protected void addTblProperties(StringBuilder query, boolean addBucketingVersion, int bucketingVersion) { + Map<String, String> tblProperties = new HashMap<>(); + tblProperties.put("transactional", "false"); + tblProperties.put(AcidUtils.COMPACTOR_TABLE_PROPERTY, compactionType.name()); + if (addBucketingVersion) { + tblProperties.put("bucketing_version", String.valueOf(bucketingVersion)); + } + if (sourceTab != null) { // to avoid NPEs, skip this part if sourceTab is null + for (Map.Entry<String, String> e : sourceTab.getParameters().entrySet()) { + if (e.getKey().startsWith("orc.")) { + tblProperties.put(e.getKey(), HiveStringUtils.escapeHiveCommand(e.getValue())); + } + } + } + addTblProperties(query, tblProperties); + } + + protected void addTblProperties(StringBuilder query, Map<String, String> tblProperties) { + // add TBLPROPERTIES clause to query + boolean isFirst; + query.append(" TBLPROPERTIES ("); + isFirst = true; + for (Map.Entry<String, String> property : tblProperties.entrySet()) { + if (!isFirst) { + query.append(", "); + } + query.append("'").append(property.getKey()).append("'='").append(property.getValue()).append("'"); + isFirst = false; + } + query.append(")"); + } + private void buildAddClauseForAlter(StringBuilder query) { if (validWriteIdList == null || dir == null) { query.setLength(0); return; // avoid NPEs, don't throw an exception but return an empty query } - long minWriteID = - validWriteIdList.getMinOpenWriteId() == null ? 1 : validWriteIdList.getMinOpenWriteId(); + long minWriteID = validWriteIdList.getMinOpenWriteId() == null ? 1 : validWriteIdList.getMinOpenWriteId(); long highWatermark = validWriteIdList.getHighWatermark(); List<AcidUtils.ParsedDelta> deltas = dir.getCurrentDirectories().stream().filter( - delta -> delta.isDeleteDelta() == isDeleteDelta && delta.getMaxWriteId() <= highWatermark - && delta.getMinWriteId() >= minWriteID) + delta -> delta.isDeleteDelta() == isDeleteDelta && delta.getMaxWriteId() <= highWatermark && delta.getMinWriteId() >= minWriteID) .collect(Collectors.toList()); if (deltas.isEmpty()) { query.setLength(0); // no alter query needed; clear StringBuilder return; } query.append(" add "); - deltas.forEach(delta -> query.append("partition (file_name='") - .append(delta.getPath().getName()).append("')" - + " location '").append(delta.getPath()).append("' ")); - } - - - private void buildSelectClauseForInsert(StringBuilder query) { - // Need list of columns for major crud, mmmajor partitioned, mmminor - List<FieldSchema> cols; - if (CompactionType.REBALANCE.equals(compactionType) || - CompactionType.MAJOR.equals(compactionType) && (!insertOnly || sourcePartition != null) || - CompactionType.MINOR.equals(compactionType) && insertOnly) { - if (sourceTab == null) { - return; // avoid NPEs, don't throw an exception but skip this part of the query - } - cols = sourceTab.getSd().getCols(); - } else { - cols = null; - } - switch (compactionType) { - case REBALANCE: { - query.append("0, t2.writeId, t2.rowId DIV CEIL(numRows / ") - .append(numberOfBuckets) - .append("), t2.rowId, t2.writeId, t2.data from (select ") - .append("count(ROW__ID.writeId) over() as numRows, "); - if (StringUtils.isNotBlank(orderByClause)) { - // in case of reordering the data the writeids cannot be kept. - query.append("MAX(ROW__ID.writeId) over() as writeId, row_number() OVER (") - .append(orderByClause); - } else { - query.append("ROW__ID.writeId as writeId, row_number() OVER (order by ROW__ID.writeId ASC, ROW__ID.bucketId ASC, ROW__ID.rowId ASC"); - } - query.append(") - 1 AS rowId, NAMED_STRUCT("); - for (int i = 0; i < cols.size(); ++i) { - query.append(i == 0 ? "'" : ", '").append(cols.get(i).getName()).append("', `") - .append(cols.get(i).getName()).append("`"); - } - query.append(") as data"); - break; - } - case MAJOR: { - if (insertOnly) { - if (sourcePartition != null) { //mmmajor and partitioned - appendColumns(query, cols, false); - } else { // mmmajor and unpartitioned - query.append("*"); - } - } else { - query.append( - "validate_acid_sort_order(ROW__ID.writeId, ROW__ID.bucketId, ROW__ID.rowId), " - + "ROW__ID.writeId, ROW__ID.bucketId, ROW__ID.rowId, ROW__ID.writeId, " - + "NAMED_STRUCT("); - appendColumns(query, cols, true); - query.append(") "); - } - break; - } - case MINOR: { - if (insertOnly) { - appendColumns(query, cols, false); - } else { - query.append( - "`operation`, `originalTransaction`, `bucket`, `rowId`, `currentTransaction`, `row`"); - } - break; - } - } + deltas.forEach( + delta -> query.append("partition (file_name='").append(delta.getPath().getName()).append("')" + " location '") + .append(delta.getPath()).append("' ")); Review Comment: Please avoid the concatenation : append("')" + " location '") -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: gitbox-unsubscr...@hive.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: gitbox-unsubscr...@hive.apache.org For additional commands, e-mail: gitbox-h...@hive.apache.org