[ 
https://issues.apache.org/jira/browse/HIVE-23887?focusedWorklogId=471515&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-471515
 ]

ASF GitHub Bot logged work on HIVE-23887:
-----------------------------------------

                Author: ASF GitHub Bot
            Created on: 17/Aug/20 14:41
            Start Date: 17/Aug/20 14:41
    Worklog Time Spent: 10m 
      Work Description: sankarh commented on a change in pull request #1370:
URL: https://github.com/apache/hive/pull/1370#discussion_r471511349



##########
File path: ql/src/test/org/apache/hadoop/hive/ql/TestTxnExIm.java
##########
@@ -566,4 +581,33 @@ public void testMMExportAborted() throws Exception {
         TestTxnCommands2.stringifyValues(data), rs);
 
   }
-}
+
+

Review comment:
       Nit: Remove extra blank line.

##########
File path: ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
##########
@@ -3185,7 +3185,7 @@ public void loadTable(Path loadPath, String tableName, 
LoadFileType loadFileType
     //column stats will be inaccurate
     if (resetStatistics) {
       LOG.debug("Clearing table statistics for " + tbl.getDbName() + "." + 
tbl.getTableName());
-      StatsSetupConst.clearColumnStatsState(tbl.getParameters());
+      
StatsSetupConst.setBasicStatsState(tbl.getParameters(),StatsSetupConst.FALSE);

Review comment:
       Even during loadPartition, we need to reset stats in table properties as 
well which is missing now.

##########
File path: ql/src/test/org/apache/hadoop/hive/ql/TestTxnExIm.java
##########
@@ -566,4 +581,33 @@ public void testMMExportAborted() throws Exception {
         TestTxnCommands2.stringifyValues(data), rs);
 
   }
-}
+
+
+  @Test public void testImportOrc() throws Exception {
+
+    runStatementOnDriver("drop table if exists T");
+    runStatementOnDriver("drop table if exists Tstage");
+    runStatementOnDriver("create table T (a int, b int) stored"
+        + " as orc tblproperties('transactional'='true')");
+    //Tstage is the target table

Review comment:
       Nit: Add single blank line before a comment line. Check below places too.

##########
File path: ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
##########
@@ -3185,7 +3185,7 @@ public void loadTable(Path loadPath, String tableName, 
LoadFileType loadFileType
     //column stats will be inaccurate
     if (resetStatistics) {
       LOG.debug("Clearing table statistics for " + tbl.getDbName() + "." + 
tbl.getTableName());
-      StatsSetupConst.clearColumnStatsState(tbl.getParameters());
+      
StatsSetupConst.setBasicStatsState(tbl.getParameters(),StatsSetupConst.FALSE);

Review comment:
       Can be combined with previous if block which also does the same thing. 
Also, the comment says "column stats will be inaccurate" but we are resetting 
both basic and column stats. Correct the log message and comment accordingly.

##########
File path: ql/src/test/org/apache/hadoop/hive/ql/TestTxnExIm.java
##########
@@ -566,4 +581,33 @@ public void testMMExportAborted() throws Exception {
         TestTxnCommands2.stringifyValues(data), rs);
 
   }
-}
+
+
+  @Test public void testImportOrc() throws Exception {
+
+    runStatementOnDriver("drop table if exists T");
+    runStatementOnDriver("drop table if exists Tstage");
+    runStatementOnDriver("create table T (a int, b int) stored"
+        + " as orc tblproperties('transactional'='true')");
+    //Tstage is the target table
+    runStatementOnDriver("create table Tstage (a int, b int) stored"
+        + " as orc tblproperties('transactional'='true')");
+    //this creates an ORC data file with correct schema under table root
+    runStatementOnDriver("insert into Tstage values(1,2),(3,4),(5,6)");
+    final int[][] rows = { { 3 } };

Review comment:
       Can we check if stats are set in source table Tstage?

##########
File path: ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
##########
@@ -3185,7 +3185,7 @@ public void loadTable(Path loadPath, String tableName, 
LoadFileType loadFileType
     //column stats will be inaccurate
     if (resetStatistics) {
       LOG.debug("Clearing table statistics for " + tbl.getDbName() + "." + 
tbl.getTableName());
-      StatsSetupConst.clearColumnStatsState(tbl.getParameters());
+      
StatsSetupConst.setBasicStatsState(tbl.getParameters(),StatsSetupConst.FALSE);

Review comment:
       Nit: Need a space after comma ",".

##########
File path: ql/src/test/org/apache/hadoop/hive/ql/TestTxnExIm.java
##########
@@ -404,6 +404,21 @@ public void testImportPartitionedOrc() throws Exception {
 
     //load T
     runStatementOnDriver("import table T from '" + getWarehouseDir() + "/1'");
+
+    //check basic stats in tblproperties
+    List<String> rsProperties = runStatementOnDriver("show tblproperties T");
+    Assert
+        .assertEquals("COLUMN_STATS_ACCURATE of imported table", 
rsProperties.contains("COLUMN_STATS_ACCURATE"), false);
+
+    //check basic stats in partition properties
+    List<String> rsPartitions = runStatementOnDriver("show partitions T");
+    for(String rsPartition : rsPartitions) {
+      List<String> rsPartitionProperties = runStatementOnDriver("describe 
formatted T partition("+ rsPartition +")");

Review comment:
       Nit: Add space before and after "+".

##########
File path: ql/src/test/org/apache/hadoop/hive/ql/TestTxnExIm.java
##########
@@ -566,4 +581,33 @@ public void testMMExportAborted() throws Exception {
         TestTxnCommands2.stringifyValues(data), rs);
 
   }
-}
+
+
+  @Test public void testImportOrc() throws Exception {

Review comment:
       Nit: The method definition and annotation can be in separate lines.

##########
File path: ql/src/test/org/apache/hadoop/hive/ql/TestTxnExIm.java
##########
@@ -404,6 +404,21 @@ public void testImportPartitionedOrc() throws Exception {
 
     //load T
     runStatementOnDriver("import table T from '" + getWarehouseDir() + "/1'");
+
+    //check basic stats in tblproperties
+    List<String> rsProperties = runStatementOnDriver("show tblproperties T");
+    Assert
+        .assertEquals("COLUMN_STATS_ACCURATE of imported table", 
rsProperties.contains("COLUMN_STATS_ACCURATE"), false);
+
+    //check basic stats in partition properties
+    List<String> rsPartitions = runStatementOnDriver("show partitions T");
+    for(String rsPartition : rsPartitions) {

Review comment:
       Nit: Add space before "("

##########
File path: ql/src/test/org/apache/hadoop/hive/ql/TestTxnExIm.java
##########
@@ -566,4 +581,33 @@ public void testMMExportAborted() throws Exception {
         TestTxnCommands2.stringifyValues(data), rs);
 
   }
-}
+
+
+  @Test public void testImportOrc() throws Exception {
+

Review comment:
       Nit: Remove this blank line.

##########
File path: ql/src/test/org/apache/hadoop/hive/ql/TestTxnExIm.java
##########
@@ -566,4 +581,33 @@ public void testMMExportAborted() throws Exception {
         TestTxnCommands2.stringifyValues(data), rs);
 
   }
-}
+
+
+  @Test public void testImportOrc() throws Exception {
+
+    runStatementOnDriver("drop table if exists T");
+    runStatementOnDriver("drop table if exists Tstage");
+    runStatementOnDriver("create table T (a int, b int) stored"
+        + " as orc tblproperties('transactional'='true')");
+    //Tstage is the target table
+    runStatementOnDriver("create table Tstage (a int, b int) stored"
+        + " as orc tblproperties('transactional'='true')");
+    //this creates an ORC data file with correct schema under table root
+    runStatementOnDriver("insert into Tstage values(1,2),(3,4),(5,6)");
+    final int[][] rows = { { 3 } };
+    //now we have an archive with 3 partitions

Review comment:
       The comment seems incorrect. This is non-partitioned table.

##########
File path: ql/src/test/org/apache/hadoop/hive/ql/TestTxnExIm.java
##########
@@ -566,4 +581,33 @@ public void testMMExportAborted() throws Exception {
         TestTxnCommands2.stringifyValues(data), rs);
 
   }
-}
+
+
+  @Test public void testImportOrc() throws Exception {
+
+    runStatementOnDriver("drop table if exists T");
+    runStatementOnDriver("drop table if exists Tstage");
+    runStatementOnDriver("create table T (a int, b int) stored"
+        + " as orc tblproperties('transactional'='true')");
+    //Tstage is the target table

Review comment:
       Comment incorrect. Tstage is source table.

##########
File path: ql/src/test/org/apache/hadoop/hive/ql/TestTxnExIm.java
##########
@@ -566,4 +581,33 @@ public void testMMExportAborted() throws Exception {
         TestTxnCommands2.stringifyValues(data), rs);
 
   }
-}
+
+
+  @Test public void testImportOrc() throws Exception {

Review comment:
       Can we also check if import sets stats to true in case of 
autostats_gather is set to true? For both partitioned and non-partitioned 
tables.

##########
File path: ql/src/test/org/apache/hadoop/hive/ql/TestTxnExIm.java
##########
@@ -404,6 +404,21 @@ public void testImportPartitionedOrc() throws Exception {
 
     //load T
     runStatementOnDriver("import table T from '" + getWarehouseDir() + "/1'");
+
+    //check basic stats in tblproperties
+    List<String> rsProperties = runStatementOnDriver("show tblproperties T");

Review comment:
       Can we check if stats are set in source table Tstage and their 
partitions? 




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Issue Time Tracking
-------------------

    Worklog Id:     (was: 471515)
    Time Spent: 50m  (was: 40m)

> Reset Columns stats in Export Statement
> ---------------------------------------
>
>                 Key: HIVE-23887
>                 URL: https://issues.apache.org/jira/browse/HIVE-23887
>             Project: Hive
>          Issue Type: Bug
>          Components: Import/Export, Statistics
>            Reporter: Ashish Sharma
>            Assignee: Ashish Sharma
>            Priority: Minor
>              Labels: pull-request-available
>          Time Spent: 50m
>  Remaining Estimate: 0h
>
> While doing "export table db.table to '/import/table' " entire column stat is 
> getting exported which lead to columns stats corruption in import table.
> Reset columns stats while export to force Imported to recalculate the Columns 
> stats



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

Reply via email to