[ https://issues.apache.org/jira/browse/HIVE-23887?focusedWorklogId=471515&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-471515 ]
ASF GitHub Bot logged work on HIVE-23887: ----------------------------------------- Author: ASF GitHub Bot Created on: 17/Aug/20 14:41 Start Date: 17/Aug/20 14:41 Worklog Time Spent: 10m Work Description: sankarh commented on a change in pull request #1370: URL: https://github.com/apache/hive/pull/1370#discussion_r471511349 ########## File path: ql/src/test/org/apache/hadoop/hive/ql/TestTxnExIm.java ########## @@ -566,4 +581,33 @@ public void testMMExportAborted() throws Exception { TestTxnCommands2.stringifyValues(data), rs); } -} + + Review comment: Nit: Remove extra blank line. ########## File path: ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java ########## @@ -3185,7 +3185,7 @@ public void loadTable(Path loadPath, String tableName, LoadFileType loadFileType //column stats will be inaccurate if (resetStatistics) { LOG.debug("Clearing table statistics for " + tbl.getDbName() + "." + tbl.getTableName()); - StatsSetupConst.clearColumnStatsState(tbl.getParameters()); + StatsSetupConst.setBasicStatsState(tbl.getParameters(),StatsSetupConst.FALSE); Review comment: Even during loadPartition, we need to reset stats in table properties as well which is missing now. ########## File path: ql/src/test/org/apache/hadoop/hive/ql/TestTxnExIm.java ########## @@ -566,4 +581,33 @@ public void testMMExportAborted() throws Exception { TestTxnCommands2.stringifyValues(data), rs); } -} + + + @Test public void testImportOrc() throws Exception { + + runStatementOnDriver("drop table if exists T"); + runStatementOnDriver("drop table if exists Tstage"); + runStatementOnDriver("create table T (a int, b int) stored" + + " as orc tblproperties('transactional'='true')"); + //Tstage is the target table Review comment: Nit: Add single blank line before a comment line. Check below places too. ########## File path: ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java ########## @@ -3185,7 +3185,7 @@ public void loadTable(Path loadPath, String tableName, LoadFileType loadFileType //column stats will be inaccurate if (resetStatistics) { LOG.debug("Clearing table statistics for " + tbl.getDbName() + "." + tbl.getTableName()); - StatsSetupConst.clearColumnStatsState(tbl.getParameters()); + StatsSetupConst.setBasicStatsState(tbl.getParameters(),StatsSetupConst.FALSE); Review comment: Can be combined with previous if block which also does the same thing. Also, the comment says "column stats will be inaccurate" but we are resetting both basic and column stats. Correct the log message and comment accordingly. ########## File path: ql/src/test/org/apache/hadoop/hive/ql/TestTxnExIm.java ########## @@ -566,4 +581,33 @@ public void testMMExportAborted() throws Exception { TestTxnCommands2.stringifyValues(data), rs); } -} + + + @Test public void testImportOrc() throws Exception { + + runStatementOnDriver("drop table if exists T"); + runStatementOnDriver("drop table if exists Tstage"); + runStatementOnDriver("create table T (a int, b int) stored" + + " as orc tblproperties('transactional'='true')"); + //Tstage is the target table + runStatementOnDriver("create table Tstage (a int, b int) stored" + + " as orc tblproperties('transactional'='true')"); + //this creates an ORC data file with correct schema under table root + runStatementOnDriver("insert into Tstage values(1,2),(3,4),(5,6)"); + final int[][] rows = { { 3 } }; Review comment: Can we check if stats are set in source table Tstage? ########## File path: ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java ########## @@ -3185,7 +3185,7 @@ public void loadTable(Path loadPath, String tableName, LoadFileType loadFileType //column stats will be inaccurate if (resetStatistics) { LOG.debug("Clearing table statistics for " + tbl.getDbName() + "." + tbl.getTableName()); - StatsSetupConst.clearColumnStatsState(tbl.getParameters()); + StatsSetupConst.setBasicStatsState(tbl.getParameters(),StatsSetupConst.FALSE); Review comment: Nit: Need a space after comma ",". ########## File path: ql/src/test/org/apache/hadoop/hive/ql/TestTxnExIm.java ########## @@ -404,6 +404,21 @@ public void testImportPartitionedOrc() throws Exception { //load T runStatementOnDriver("import table T from '" + getWarehouseDir() + "/1'"); + + //check basic stats in tblproperties + List<String> rsProperties = runStatementOnDriver("show tblproperties T"); + Assert + .assertEquals("COLUMN_STATS_ACCURATE of imported table", rsProperties.contains("COLUMN_STATS_ACCURATE"), false); + + //check basic stats in partition properties + List<String> rsPartitions = runStatementOnDriver("show partitions T"); + for(String rsPartition : rsPartitions) { + List<String> rsPartitionProperties = runStatementOnDriver("describe formatted T partition("+ rsPartition +")"); Review comment: Nit: Add space before and after "+". ########## File path: ql/src/test/org/apache/hadoop/hive/ql/TestTxnExIm.java ########## @@ -566,4 +581,33 @@ public void testMMExportAborted() throws Exception { TestTxnCommands2.stringifyValues(data), rs); } -} + + + @Test public void testImportOrc() throws Exception { Review comment: Nit: The method definition and annotation can be in separate lines. ########## File path: ql/src/test/org/apache/hadoop/hive/ql/TestTxnExIm.java ########## @@ -404,6 +404,21 @@ public void testImportPartitionedOrc() throws Exception { //load T runStatementOnDriver("import table T from '" + getWarehouseDir() + "/1'"); + + //check basic stats in tblproperties + List<String> rsProperties = runStatementOnDriver("show tblproperties T"); + Assert + .assertEquals("COLUMN_STATS_ACCURATE of imported table", rsProperties.contains("COLUMN_STATS_ACCURATE"), false); + + //check basic stats in partition properties + List<String> rsPartitions = runStatementOnDriver("show partitions T"); + for(String rsPartition : rsPartitions) { Review comment: Nit: Add space before "(" ########## File path: ql/src/test/org/apache/hadoop/hive/ql/TestTxnExIm.java ########## @@ -566,4 +581,33 @@ public void testMMExportAborted() throws Exception { TestTxnCommands2.stringifyValues(data), rs); } -} + + + @Test public void testImportOrc() throws Exception { + Review comment: Nit: Remove this blank line. ########## File path: ql/src/test/org/apache/hadoop/hive/ql/TestTxnExIm.java ########## @@ -566,4 +581,33 @@ public void testMMExportAborted() throws Exception { TestTxnCommands2.stringifyValues(data), rs); } -} + + + @Test public void testImportOrc() throws Exception { + + runStatementOnDriver("drop table if exists T"); + runStatementOnDriver("drop table if exists Tstage"); + runStatementOnDriver("create table T (a int, b int) stored" + + " as orc tblproperties('transactional'='true')"); + //Tstage is the target table + runStatementOnDriver("create table Tstage (a int, b int) stored" + + " as orc tblproperties('transactional'='true')"); + //this creates an ORC data file with correct schema under table root + runStatementOnDriver("insert into Tstage values(1,2),(3,4),(5,6)"); + final int[][] rows = { { 3 } }; + //now we have an archive with 3 partitions Review comment: The comment seems incorrect. This is non-partitioned table. ########## File path: ql/src/test/org/apache/hadoop/hive/ql/TestTxnExIm.java ########## @@ -566,4 +581,33 @@ public void testMMExportAborted() throws Exception { TestTxnCommands2.stringifyValues(data), rs); } -} + + + @Test public void testImportOrc() throws Exception { + + runStatementOnDriver("drop table if exists T"); + runStatementOnDriver("drop table if exists Tstage"); + runStatementOnDriver("create table T (a int, b int) stored" + + " as orc tblproperties('transactional'='true')"); + //Tstage is the target table Review comment: Comment incorrect. Tstage is source table. ########## File path: ql/src/test/org/apache/hadoop/hive/ql/TestTxnExIm.java ########## @@ -566,4 +581,33 @@ public void testMMExportAborted() throws Exception { TestTxnCommands2.stringifyValues(data), rs); } -} + + + @Test public void testImportOrc() throws Exception { Review comment: Can we also check if import sets stats to true in case of autostats_gather is set to true? For both partitioned and non-partitioned tables. ########## File path: ql/src/test/org/apache/hadoop/hive/ql/TestTxnExIm.java ########## @@ -404,6 +404,21 @@ public void testImportPartitionedOrc() throws Exception { //load T runStatementOnDriver("import table T from '" + getWarehouseDir() + "/1'"); + + //check basic stats in tblproperties + List<String> rsProperties = runStatementOnDriver("show tblproperties T"); Review comment: Can we check if stats are set in source table Tstage and their partitions? ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking ------------------- Worklog Id: (was: 471515) Time Spent: 50m (was: 40m) > Reset Columns stats in Export Statement > --------------------------------------- > > Key: HIVE-23887 > URL: https://issues.apache.org/jira/browse/HIVE-23887 > Project: Hive > Issue Type: Bug > Components: Import/Export, Statistics > Reporter: Ashish Sharma > Assignee: Ashish Sharma > Priority: Minor > Labels: pull-request-available > Time Spent: 50m > Remaining Estimate: 0h > > While doing "export table db.table to '/import/table' " entire column stat is > getting exported which lead to columns stats corruption in import table. > Reset columns stats while export to force Imported to recalculate the Columns > stats -- This message was sent by Atlassian Jira (v8.3.4#803005)