[ https://issues.apache.org/jira/browse/HIVE-24884?focusedWorklogId=774350&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-774350 ]
ASF GitHub Bot logged work on HIVE-24884: ----------------------------------------- Author: ASF GitHub Bot Created on: 25/May/22 04:14 Start Date: 25/May/22 04:14 Worklog Time Spent: 10m Work Description: maheshk114 commented on code in PR #3293: URL: https://github.com/apache/hive/pull/3293#discussion_r876521344 ########## ql/src/java/org/apache/hadoop/hive/ql/parse/repl/load/DumpMetaData.java: ########## @@ -35,33 +40,64 @@ import java.util.Arrays; import java.util.List; +@JsonIgnoreProperties(ignoreUnknown = true) public class DumpMetaData { // wrapper class for reading and writing metadata about a dump // responsible for _dumpmetadata files public static final String DUMP_METADATA = "_dumpmetadata"; + + // New version of dump metadata file to store top level dumpmetadata content in JSON format + public static final String DUMP_METADATA_V2 = "_dumpmetadata_v2"; private static final Logger LOG = LoggerFactory.getLogger(DumpMetaData.class); + private static ObjectMapper JSON_OBJECT_MAPPER = new ObjectMapper(); // Thread-safe. + @JsonProperty private DumpType dumpType; + @JsonProperty private Long eventFrom = null; + @JsonProperty private Long eventTo = null; + @JsonProperty private Path cmRoot; + @JsonProperty private String payload = null; - private ReplScope replScope = null; - - private boolean initialized = false; - private final Path dumpFile; - private final HiveConf hiveConf; + @JsonProperty private Long dumpExecutionId; + @JsonProperty private boolean replScopeModified = false; + @JsonProperty + private String replScopeStr = null; + //Ignore rest of the properties + @JsonIgnore + private ReplScope replScope = null; + @JsonIgnore + private Path dumpFile; + @JsonIgnore + private final HiveConf hiveConf; + @JsonIgnore + private boolean isTopLevel; + @JsonIgnore + private Path dumpRoot; + @JsonIgnore + private boolean initialized = false; + + public DumpMetaData() { + //to be instantiated by JSON ObjectMapper. + hiveConf = null; + } public DumpMetaData(Path dumpRoot, HiveConf hiveConf) { - this.hiveConf = hiveConf; - dumpFile = new Path(dumpRoot, DUMP_METADATA); + this(dumpRoot, hiveConf, false); } + public DumpMetaData(Path dumpRoot, HiveConf hiveConf, boolean isTopLevel) { + this.dumpRoot = dumpRoot; + this.hiveConf = hiveConf; + this.isTopLevel = isTopLevel; + } public DumpMetaData(Path dumpRoot, DumpType lvl, Long eventFrom, Long eventTo, Path cmRoot, Review Comment: Where is it used now ..dumping to normal files ? ########## ql/src/java/org/apache/hadoop/hive/ql/parse/repl/load/DumpMetaData.java: ########## @@ -35,33 +40,64 @@ import java.util.Arrays; import java.util.List; +@JsonIgnoreProperties(ignoreUnknown = true) public class DumpMetaData { // wrapper class for reading and writing metadata about a dump // responsible for _dumpmetadata files public static final String DUMP_METADATA = "_dumpmetadata"; + + // New version of dump metadata file to store top level dumpmetadata content in JSON format + public static final String DUMP_METADATA_V2 = "_dumpmetadata_v2"; private static final Logger LOG = LoggerFactory.getLogger(DumpMetaData.class); + private static ObjectMapper JSON_OBJECT_MAPPER = new ObjectMapper(); // Thread-safe. + @JsonProperty private DumpType dumpType; + @JsonProperty private Long eventFrom = null; + @JsonProperty private Long eventTo = null; + @JsonProperty private Path cmRoot; + @JsonProperty private String payload = null; - private ReplScope replScope = null; - - private boolean initialized = false; - private final Path dumpFile; - private final HiveConf hiveConf; + @JsonProperty private Long dumpExecutionId; + @JsonProperty private boolean replScopeModified = false; + @JsonProperty + private String replScopeStr = null; + //Ignore rest of the properties + @JsonIgnore + private ReplScope replScope = null; + @JsonIgnore + private Path dumpFile; + @JsonIgnore + private final HiveConf hiveConf; + @JsonIgnore + private boolean isTopLevel; + @JsonIgnore + private Path dumpRoot; + @JsonIgnore + private boolean initialized = false; + + public DumpMetaData() { + //to be instantiated by JSON ObjectMapper. + hiveConf = null; + } public DumpMetaData(Path dumpRoot, HiveConf hiveConf) { - this.hiveConf = hiveConf; - dumpFile = new Path(dumpRoot, DUMP_METADATA); + this(dumpRoot, hiveConf, false); } + public DumpMetaData(Path dumpRoot, HiveConf hiveConf, boolean isTopLevel) { Review Comment: what is isTopLevel means ? ########## ql/src/java/org/apache/hadoop/hive/ql/parse/repl/load/DumpMetaData.java: ########## @@ -117,6 +153,32 @@ private void readReplScope(String line) throws IOException { } private void loadDumpFromFile() throws SemanticException { + boolean isInJSONFormat = resolveDumpFilePathAndGetIfV2(); + if (isInJSONFormat) { + loadDumpFromFileV2(); + } else { + loadDumpFromFileV1(); + } + } + + //Returns true if dumpmetaData is in V2 Format + private boolean resolveDumpFilePathAndGetIfV2() throws SemanticException { + if (isTopLevel) { + dumpFile = new Path(dumpRoot, DUMP_METADATA_V2); + if (Utils.fileExists(dumpFile, hiveConf)) { + return true; + } + //Backward-compatibility: fall back to old version. Dump might be generated by old version + dumpFile = new Path(dumpRoot, DUMP_METADATA); + LOG.info("Falling back to old version of dump meta data {}", dumpFile); + } else { + // The nested level _dumpmetadata file content is still in old format: To save JSON parsing cost. + dumpFile = new Path(dumpRoot, DUMP_METADATA); + } + return false; + } + + private void loadDumpFromFileV1() throws SemanticException { BufferedReader br = null; try { Review Comment: is there any tests to verify this path ? Issue Time Tracking ------------------- Worklog Id: (was: 774350) Time Spent: 1h (was: 50m) > Move top level dump metadata content to be in JSON format > --------------------------------------------------------- > > Key: HIVE-24884 > URL: https://issues.apache.org/jira/browse/HIVE-24884 > Project: Hive > Issue Type: Task > Reporter: Pravin Sinha > Assignee: Pravin Sinha > Priority: Major > Labels: pull-request-available > Time Spent: 1h > Remaining Estimate: 0h > > {color:#172b4d}The current content for _dumpmetadata file is TAB separated. > This is not very flexible for extension. A more flexible format like JSON > based content would be helpful for extending the content.{color} -- This message was sent by Atlassian Jira (v8.20.7#820007)