simhadri-g commented on code in PR #4431:
URL: https://github.com/apache/hive/pull/4431#discussion_r1265160586


##########
iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java:
##########
@@ -452,65 +455,70 @@ public boolean 
canSetColStatistics(org.apache.hadoop.hive.ql.metadata.Table hmsT
   }
 
   @Override
-  public boolean setColStatistics(org.apache.hadoop.hive.ql.metadata.Table 
hmsTable,
-      List<ColumnStatistics> colStats) {
+  public boolean setColStatistics(org.apache.hadoop.hive.ql.metadata.Table 
hmsTable, List<ColumnStatistics> colStats,
+      ColumnStatsDesc columnStatsDesc) {
     Table tbl = IcebergTableUtil.getTable(conf, hmsTable.getTTable());
     String snapshotId = String.format("%s-STATS-%d", tbl.name(), 
tbl.currentSnapshot().snapshotId());
-    invalidateStats(getStatsPath(tbl));
-    byte[] serializeColStats = SerializationUtils.serialize((Serializable) 
colStats);
-    try (PuffinWriter writer = 
Puffin.write(tbl.io().newOutputFile(getStatsPath(tbl).toString()))
-        .createdBy(Constants.HIVE_ENGINE).build()) {
-      writer.add(
-          new Blob(
-              tbl.name() + "-" + snapshotId,
-              ImmutableList.of(1),
-              tbl.currentSnapshot().snapshotId(),
-              tbl.currentSnapshot().sequenceNumber(),
-              ByteBuffer.wrap(serializeColStats),
-              PuffinCompressionCodec.NONE,
-              ImmutableMap.of()));
-      writer.finish();
-      return true;
-    } catch (IOException e) {
-      LOG.error(String.valueOf(e));
+    try {
+      boolean rewriteStats = removeStatsIfExists(tbl);
+      if (!rewriteStats) {
+        checkAndMergeStats(colStats.get(0), tbl);
+      }
+      byte[] serializeColStats = SerializationUtils.serialize((Serializable) 
colStats);

Review Comment:
   done



##########
iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java:
##########
@@ -536,17 +544,30 @@ private String getStatsSource() {
   }
 
   private Path getStatsPath(Table table) {
-    return new Path(table.location() + STATS + table.name() + 
table.currentSnapshot().snapshotId());
+    return getStatsPath(table, table.currentSnapshot().snapshotId());
   }
 
-  private void invalidateStats(Path statsPath) {
-    try {
-      FileSystem fs = statsPath.getFileSystem(conf);
-      if (fs.exists(statsPath)) {
-        fs.delete(statsPath, true);
+  private Path getStatsPath(Table table, long snapshotId) {
+    return new Path(table.location() + STATS + table.name() + snapshotId);
+  }
+
+  private boolean removeStatsIfExists(Table tbl) throws IOException {

Review Comment:
   Done



##########
iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java:
##########
@@ -536,17 +544,30 @@ private String getStatsSource() {
   }
 
   private Path getStatsPath(Table table) {
-    return new Path(table.location() + STATS + table.name() + 
table.currentSnapshot().snapshotId());
+    return getStatsPath(table, table.currentSnapshot().snapshotId());
   }
 
-  private void invalidateStats(Path statsPath) {
-    try {
-      FileSystem fs = statsPath.getFileSystem(conf);
-      if (fs.exists(statsPath)) {
-        fs.delete(statsPath, true);
+  private Path getStatsPath(Table table, long snapshotId) {
+    return new Path(table.location() + STATS + table.name() + snapshotId);
+  }
+
+  private boolean removeStatsIfExists(Table tbl) throws IOException {
+    Path statsPath = getStatsPath(tbl);
+    FileSystem fs = statsPath.getFileSystem(conf);
+    if (fs.exists(statsPath)) {
+      // Analyze table and stats updater thread
+      return fs.delete(statsPath, true);
+    }
+    return false;
+  }
+
+  private void checkAndMergeStats(ColumnStatistics statsObjNew, Table tbl) 
throws InvalidObjectException {

Review Comment:
   done



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to