Re: [PR] HIVE-27956: Query based compactor implementation separation [hive]

via GitHub Tue, 14 May 2024 02:11:37 -0700


zratkai commented on code in PR #5192:
URL: https://github.com/apache/hive/pull/5192#discussion_r1599670314



##########
ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactionQueryBuilder.java:
##########
@@ -275,137 +250,105 @@ String build() {
     case DROP:
     default:
     }
-
     return query.toString();
   }
 
+  protected void getDdlForCreate(StringBuilder query) {
+    defineColumns(query);
+
+    // PARTITIONED BY. Used for parts of minor compaction.
+    if (isPartitioned) {
+      query.append(" PARTITIONED BY (`file_name` STRING) ");
+    }
+
+    // STORED AS / ROW FORMAT SERDE + INPUTFORMAT + OUTPUTFORMAT
+    query.append(" stored as orc");
+
+    // LOCATION
+    if (location != null) {
+      query.append(" LOCATION 
'").append(HiveStringUtils.escapeHiveCommand(location)).append("'");
+    }
+
+    addTblProperties(query, false, 0);
+  }
+
+  /**
+   * Part of Create operation. All tmp tables are not transactional and are 
marked as
+   * compaction tables. Additionally...
+   * - Crud compaction temp tables need tblproperty, "compactiontable."
+   * - Minor crud compaction temp tables need bucketing version tblproperty, 
if table is bucketed.
+   */
+  protected void addTblProperties(StringBuilder query, boolean 
addBucketingVersion, int bucketingVersion) {
+    Map<String, String> tblProperties = new HashMap<>();
+    tblProperties.put("transactional", "false");
+    tblProperties.put(AcidUtils.COMPACTOR_TABLE_PROPERTY, 
compactionType.name());
+    if (addBucketingVersion) {
+      tblProperties.put("bucketing_version", String.valueOf(bucketingVersion));
+    }
+    if (sourceTab != null) { // to avoid NPEs, skip this part if sourceTab is 
null
+      for (Map.Entry<String, String> e : sourceTab.getParameters().entrySet()) 
{
+        if (e.getKey().startsWith("orc.")) {
+          tblProperties.put(e.getKey(), 
HiveStringUtils.escapeHiveCommand(e.getValue()));
+        }
+      }
+    }
+    addTblProperties(query, tblProperties);
+  }
+
+  protected void addTblProperties(StringBuilder query, Map<String, String> 
tblProperties) {
+    // add TBLPROPERTIES clause to query
+    boolean isFirst;
+    query.append(" TBLPROPERTIES (");
+    isFirst = true;
+    for (Map.Entry<String, String> property : tblProperties.entrySet()) {
+      if (!isFirst) {
+        query.append(", ");
+      }
+      
query.append("'").append(property.getKey()).append("'='").append(property.getValue()).append("'");
+      isFirst = false;
+    }
+    query.append(")");
+  }
+
   private void buildAddClauseForAlter(StringBuilder query) {
     if (validWriteIdList == null || dir == null) {
       query.setLength(0);
       return;  // avoid NPEs, don't throw an exception but return an empty 
query
     }
-    long minWriteID =
-        validWriteIdList.getMinOpenWriteId() == null ? 1 : 
validWriteIdList.getMinOpenWriteId();
+    long minWriteID = validWriteIdList.getMinOpenWriteId() == null ? 1 : 
validWriteIdList.getMinOpenWriteId();
     long highWatermark = validWriteIdList.getHighWatermark();
     List<AcidUtils.ParsedDelta> deltas = 
dir.getCurrentDirectories().stream().filter(
-        delta -> delta.isDeleteDelta() == isDeleteDelta && 
delta.getMaxWriteId() <= highWatermark
-            && delta.getMinWriteId() >= minWriteID)
+            delta -> delta.isDeleteDelta() == isDeleteDelta && 
delta.getMaxWriteId() <= highWatermark && delta.getMinWriteId() >= minWriteID)
         .collect(Collectors.toList());
     if (deltas.isEmpty()) {
       query.setLength(0); // no alter query needed; clear StringBuilder
       return;
     }
     query.append(" add ");
-    deltas.forEach(delta -> query.append("partition (file_name='")
-        .append(delta.getPath().getName()).append("')"
-            + " location '").append(delta.getPath()).append("' "));
-  }
-
-
-  private void buildSelectClauseForInsert(StringBuilder query) {
-    // Need list of columns for major crud, mmmajor partitioned, mmminor
-    List<FieldSchema> cols;
-    if (CompactionType.REBALANCE.equals(compactionType) ||
-        CompactionType.MAJOR.equals(compactionType) && (!insertOnly || 
sourcePartition != null) ||
-        CompactionType.MINOR.equals(compactionType) && insertOnly) {
-      if (sourceTab == null) {
-        return; // avoid NPEs, don't throw an exception but skip this part of 
the query
-      }
-      cols = sourceTab.getSd().getCols();
-    } else {
-      cols = null;
-    }
-    switch (compactionType) {
-      case REBALANCE: {
-        query.append("0, t2.writeId, t2.rowId DIV CEIL(numRows / ")
-            .append(numberOfBuckets)
-            .append("), t2.rowId, t2.writeId, t2.data from (select ")
-            .append("count(ROW__ID.writeId) over() as numRows, ");
-        if (StringUtils.isNotBlank(orderByClause)) {
-          // in case of reordering the data the writeids cannot be kept.
-          query.append("MAX(ROW__ID.writeId) over() as writeId, row_number() 
OVER (")
-            .append(orderByClause);
-        } else {
-          query.append("ROW__ID.writeId as writeId, row_number() OVER (order 
by ROW__ID.writeId ASC, ROW__ID.bucketId ASC, ROW__ID.rowId ASC");
-        }
-        query.append(") - 1 AS rowId, NAMED_STRUCT(");
-        for (int i = 0; i < cols.size(); ++i) {
-          query.append(i == 0 ? "'" : ", 
'").append(cols.get(i).getName()).append("', `")
-              .append(cols.get(i).getName()).append("`");
-        }
-        query.append(") as data");
-        break;
-      }
-      case MAJOR: {
-        if (insertOnly) {
-          if (sourcePartition != null) { //mmmajor and partitioned
-            appendColumns(query, cols, false);
-          } else { // mmmajor and unpartitioned
-            query.append("*");
-          }
-        } else {
-          query.append(
-              "validate_acid_sort_order(ROW__ID.writeId, ROW__ID.bucketId, 
ROW__ID.rowId), "
-                  + "ROW__ID.writeId, ROW__ID.bucketId, ROW__ID.rowId, 
ROW__ID.writeId, "
-                  + "NAMED_STRUCT(");
-          appendColumns(query, cols, true);
-          query.append(") ");
-        }
-        break;
-      }
-      case MINOR: {
-        if (insertOnly) {
-          appendColumns(query, cols, false);
-        } else {
-          query.append(
-              "`operation`, `originalTransaction`, `bucket`, `rowId`, 
`currentTransaction`, `row`");
-        }
-        break;
-      }
-    }
+    deltas.forEach(
+        delta -> query.append("partition 
(file_name='").append(delta.getPath().getName()).append("')" + " location '")
+            .append(delta.getPath()).append("' "));

Review Comment:
   Please avoid the concatenation : append("')" + " location '")



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: gitbox-unsubscr...@hive.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: gitbox-unsubscr...@hive.apache.org
For additional commands, e-mail: gitbox-h...@hive.apache.org

Re: [PR] HIVE-27956: Query based compactor implementation separation [hive]

Reply via email to