[ 
https://issues.apache.org/jira/browse/HIVE-21960?focusedWorklogId=284357&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-284357
 ]

ASF GitHub Bot logged work on HIVE-21960:
-----------------------------------------

                Author: ASF GitHub Bot
            Created on: 29/Jul/19 15:49
            Start Date: 29/Jul/19 15:49
    Worklog Time Spent: 10m 
      Work Description: ashutosh-bapat commented on pull request #735: 
HIVE-21960 : Avoid running stats updater and partition management task on a 
replicated table.
URL: https://github.com/apache/hive/pull/735#discussion_r308304082
 
 

 ##########
 File path: 
standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestPartitionManagement.java
 ##########
 @@ -563,6 +564,94 @@ public void testPartitionDiscoverySkipInvalidPath() 
throws TException, IOExcepti
     assertEquals(4, partitions.size());
   }
 
+  @Test
+  public void testNoPartitionDiscoveryForReplTable() throws Exception {
+    String dbName = "db_repl1";
+    String tableName = "tbl_repl1";
+    Map<String, Column> colMap = buildAllColumns();
+    List<String> partKeys = Lists.newArrayList("state", "dt");
+    List<String> partKeyTypes = Lists.newArrayList("string", "date");
+    List<List<String>> partVals = Lists.newArrayList(
+            Lists.newArrayList("__HIVE_DEFAULT_PARTITION__", "1990-01-01"),
+            Lists.newArrayList("CA", "1986-04-28"),
+            Lists.newArrayList("MN", "2018-11-31"));
+    createMetadata(DEFAULT_CATALOG_NAME, dbName, tableName, partKeys, 
partKeyTypes, partVals, colMap, false);
+    Table table = client.getTable(dbName, tableName);
+    List<Partition> partitions = client.listPartitions(dbName, tableName, 
(short) -1);
+    assertEquals(3, partitions.size());
+    String tableLocation = table.getSd().getLocation();
+    URI location = URI.create(tableLocation);
+    Path tablePath = new Path(location);
+    FileSystem fs = FileSystem.get(location, conf);
+    Path newPart1 = new Path(tablePath, "state=WA/dt=2018-12-01");
+    Path newPart2 = new Path(tablePath, "state=UT/dt=2018-12-02");
+    fs.mkdirs(newPart1);
+    fs.mkdirs(newPart2);
+    assertEquals(5, fs.listStatus(tablePath).length);
+    partitions = client.listPartitions(dbName, tableName, (short) -1);
+    assertEquals(3, partitions.size());
+
+    // table property is set to true, but the table is marked as replication 
target. The new
+    // partitions should not be created
+    
table.getParameters().put(PartitionManagementTask.DISCOVER_PARTITIONS_TBLPROPERTY,
 "true");
+    table.getParameters().put(ReplConst.REPL_TARGET_PROPERTY, "1");
+    client.alter_table(dbName, tableName, table);
+    runPartitionManagementTask(conf);
+    partitions = client.listPartitions(dbName, tableName, (short) -1);
+    assertEquals(3, partitions.size());
+
+    // change table type to external, delete a partition directory and make 
sure partition discovery works
+    table.getParameters().put("EXTERNAL", "true");
+    table.setTableType(TableType.EXTERNAL_TABLE.name());
+    client.alter_table(dbName, tableName, table);
+    // Delete location of one of the partitions. The partition discovery task 
should not drop
+    // that partition.
+    boolean deleted = fs.delete((new 
Path(URI.create(partitions.get(0).getSd().getLocation()))).getParent(),
+                    true);
+    assertTrue(deleted);
+    assertEquals(4, fs.listStatus(tablePath).length);
+    runPartitionManagementTask(conf);
+    partitions = client.listPartitions(dbName, tableName, (short) -1);
+    assertEquals(3, partitions.size());
+  }
+
+  @Test
+  public void testNoPartitionRetentionForReplTarget() throws TException, 
InterruptedException {
+    String dbName = "db_repl2";
+    String tableName = "tbl_repl2";
+    Map<String, Column> colMap = buildAllColumns();
+    List<String> partKeys = Lists.newArrayList("state", "dt");
+    List<String> partKeyTypes = Lists.newArrayList("string", "date");
+    List<List<String>> partVals = Lists.newArrayList(
+            Lists.newArrayList("__HIVE_DEFAULT_PARTITION__", "1990-01-01"),
+            Lists.newArrayList("CA", "1986-04-28"),
+            Lists.newArrayList("MN", "2018-11-31"));
+    // Check for the existence of partitions 10 seconds after the partition 
retention period has
+    // elapsed. Gives enough time for the partition retention task to work.
+    long partitionRetentionPeriodMs = 20000;
+    long waitingPeriodForTest = partitionRetentionPeriodMs + 10 * 1000;
+    createMetadata(DEFAULT_CATALOG_NAME, dbName, tableName, partKeys, 
partKeyTypes, partVals, colMap, false);
+    Table table = client.getTable(dbName, tableName);
+    List<Partition> partitions = client.listPartitions(dbName, tableName, 
(short) -1);
+    assertEquals(3, partitions.size());
+
+    
table.getParameters().put(PartitionManagementTask.DISCOVER_PARTITIONS_TBLPROPERTY,
 "true");
+    
table.getParameters().put(PartitionManagementTask.PARTITION_RETENTION_PERIOD_TBLPROPERTY,
+            partitionRetentionPeriodMs + "ms");
+    table.getParameters().put(ReplConst.REPL_TARGET_PROPERTY, "1");
+    client.alter_table(dbName, tableName, table);
+
+    runPartitionManagementTask(conf);
+    partitions = client.listPartitions(dbName, tableName, (short) -1);
+    assertEquals(3, partitions.size());
+
+    // after 30s all partitions should remain in-tact for a table which is 
target of replication.
+    Thread.sleep(waitingPeriodForTest);
+    runPartitionManagementTask(conf);
+    partitions = client.listPartitions(dbName, tableName, (short) -1);
 
 Review comment:
   testPartitionRetention() does that already. Do you want some other scenario 
to be tested?
 
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Issue Time Tracking
-------------------

    Worklog Id:     (was: 284357)
    Time Spent: 1h  (was: 50m)

> HMS tasks on replica
> --------------------
>
>                 Key: HIVE-21960
>                 URL: https://issues.apache.org/jira/browse/HIVE-21960
>             Project: Hive
>          Issue Type: Improvement
>          Components: HiveServer2, repl
>    Affects Versions: 4.0.0
>            Reporter: Ashutosh Bapat
>            Assignee: Ashutosh Bapat
>            Priority: Major
>              Labels: pull-request-available
>         Attachments: HIVE-21960.01.patch, HIVE-21960.02.patch, 
> HIVE-21960.03.patch, Replication and House keeping tasks.pdf
>
>          Time Spent: 1h
>  Remaining Estimate: 0h
>
> An HMS performs a number of housekeeping tasks. Assess whether
>  # They are required to be performed in the replicated data
>  # Performing those on replicated data causes any issues and how to fix those.



--
This message was sent by Atlassian JIRA
(v7.6.14#76016)

Reply via email to