HIVE-11497: Make sure --orcfiledump utility includes OrcRecordUpdate.AcidStats (Prasanth Jayachandran reviewed by Eugene Koifman)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/3e21a6d4 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/3e21a6d4 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/3e21a6d4 Branch: refs/heads/spark Commit: 3e21a6d44971feb91ab26ec6dbf8ee207683ada1 Parents: f2ede0e Author: Prasanth Jayachandran <j.prasant...@gmail.com> Authored: Tue Oct 27 23:44:51 2015 -0500 Committer: Prasanth Jayachandran <j.prasant...@gmail.com> Committed: Tue Oct 27 23:44:51 2015 -0500 ---------------------------------------------------------------------- .../apache/hadoop/hive/ql/io/orc/FileDump.java | 4 +++ .../hadoop/hive/ql/io/orc/JsonFileDump.java | 6 ++++ .../hadoop/hive/ql/io/orc/OrcRecordUpdater.java | 34 +++++++++++++------- ql/src/test/resources/orc-file-dump.json | 3 +- 4 files changed, 35 insertions(+), 12 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/3e21a6d4/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java index 9c6538f..0e9667a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java @@ -249,6 +249,10 @@ public final class FileDump { System.out.println("\nFile length: " + fileLen + " bytes"); System.out.println("Padding length: " + paddedBytes + " bytes"); System.out.println("Padding ratio: " + format.format(percentPadding) + "%"); + OrcRecordUpdater.AcidStats acidStats = OrcRecordUpdater.parseAcidStats(reader); + if (acidStats != null) { + System.out.println("ACID stats:" + acidStats); + } rows.close(); if (files.size() > 1) { System.out.println(Strings.repeat("=", 80) + "\n"); http://git-wip-us.apache.org/repos/asf/hive/blob/3e21a6d4/ql/src/java/org/apache/hadoop/hive/ql/io/orc/JsonFileDump.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/JsonFileDump.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/JsonFileDump.java index 02e01b4..7f673dc 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/JsonFileDump.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/JsonFileDump.java @@ -167,6 +167,12 @@ public class JsonFileDump { writer.key("fileLength").value(fileLen); writer.key("paddingLength").value(paddedBytes); writer.key("paddingRatio").value(percentPadding); + OrcRecordUpdater.AcidStats acidStats = OrcRecordUpdater.parseAcidStats(reader); + if (acidStats != null) { + writer.key("numInserts").value(acidStats.inserts); + writer.key("numDeletes").value(acidStats.deletes); + writer.key("numUpdates").value(acidStats.updates); + } writer.key("status").value("OK"); rows.close(); http://git-wip-us.apache.org/repos/asf/hive/blob/3e21a6d4/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRecordUpdater.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRecordUpdater.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRecordUpdater.java index 2220b8e..01374a7 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRecordUpdater.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRecordUpdater.java @@ -45,7 +45,6 @@ import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import com.google.common.annotations.VisibleForTesting; -import com.google.common.collect.Lists; /** * A RecordUpdater where the files are stored as ORC. @@ -128,6 +127,15 @@ public class OrcRecordUpdater implements RecordUpdater { builder.append(deletes); return builder.toString(); } + + @Override + public String toString() { + StringBuilder builder = new StringBuilder(); + builder.append(" inserts: ").append(inserts); + builder.append(" updates: ").append(updates); + builder.append(" deletes: ").append(deletes); + return builder.toString(); + } } static Path getSideFile(Path main) { @@ -448,17 +456,21 @@ public class OrcRecordUpdater implements RecordUpdater { * {@link KeyIndexBuilder} creates these */ static AcidStats parseAcidStats(Reader reader) { - String statsSerialized; - try { - ByteBuffer val = - reader.getMetadataValue(OrcRecordUpdater.ACID_STATS) - .duplicate(); - statsSerialized = utf8Decoder.decode(val).toString(); - } catch (CharacterCodingException e) { - throw new IllegalArgumentException("Bad string encoding for " + - OrcRecordUpdater.ACID_STATS, e); + if (reader.hasMetadataValue(OrcRecordUpdater.ACID_STATS)) { + String statsSerialized; + try { + ByteBuffer val = + reader.getMetadataValue(OrcRecordUpdater.ACID_STATS) + .duplicate(); + statsSerialized = utf8Decoder.decode(val).toString(); + } catch (CharacterCodingException e) { + throw new IllegalArgumentException("Bad string encoding for " + + OrcRecordUpdater.ACID_STATS, e); + } + return new AcidStats(statsSerialized); + } else { + return null; } - return new AcidStats(statsSerialized); } static class KeyIndexBuilder implements OrcFile.WriterCallback { http://git-wip-us.apache.org/repos/asf/hive/blob/3e21a6d4/ql/src/test/resources/orc-file-dump.json ---------------------------------------------------------------------- diff --git a/ql/src/test/resources/orc-file-dump.json b/ql/src/test/resources/orc-file-dump.json index 646dfe5..25fd63b 100644 --- a/ql/src/test/resources/orc-file-dump.json +++ b/ql/src/test/resources/orc-file-dump.json @@ -1350,5 +1350,6 @@ ], "fileLength": 273300, "paddingLength": 0, - "paddingRatio": 0 + "paddingRatio": 0, + "status": "OK" }