[ https://issues.apache.org/jira/browse/HIVE-25441?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Denys Kuzmenko resolved HIVE-25441. ----------------------------------- Resolution: Fixed > Incorrect deltas split for sub-compactions when using > `hive.compactor.max.num.delta` > ------------------------------------------------------------------------------------ > > Key: HIVE-25441 > URL: https://issues.apache.org/jira/browse/HIVE-25441 > Project: Hive > Issue Type: Task > Reporter: Denys Kuzmenko > Priority: Major > Labels: pull-request-available > Time Spent: 2h > Remaining Estimate: 0h > > {code} > #Repro steps: > #1./ set hive.compactor.max.num.delta to 5 on HMS > #2./ Set up the table > set hive.merge.cardinality.check=false; > create table test (k int); > ALTER TABLE test SET TBLPROPERTIES ('NO_AUTO_COMPACTION'='true'); > insert into test values (1); > alter table test compact 'major' and wait; > dfs -ls '/warehouse/tablespace/managed/hive/test'; > # drwxrwx---+ - hive hive 0 2021-08-09 12:26 > /warehouse/tablespace/managed/hive/test/base_0000008_v0000416 > select * from test; > # k=1 > #run 3 times so there's enough delta dirs, ie. 6 (should just increase k by 1) > #basically just removes the row and adds a new row with k+1 value > MERGE INTO test AS T USING (select * from test union all select k+1 from > test) AS S > ON T.k=s.k > WHEN MATCHED THEN DELETE > WHEN not MATCHED THEN INSERT values (s.k); > select * from test; > #k=4 > dfs -ls '/warehouse/tablespace/managed/hive/test'; > #drwxrwx---+ - hive hive 0 2021-08-09 12:26 > /warehouse/tablespace/managed/hive/test/base_0000008_v0000416 > #drwxrwx---+ - hive hive 0 2021-08-09 12:28 > /warehouse/tablespace/managed/hive/test/delete_delta_0000009_0000009_0001 > #drwxrwx---+ - hive hive 0 2021-08-09 12:29 > /warehouse/tablespace/managed/hive/test/delete_delta_0000010_0000010_0001 > #drwxrwx---+ - hive hive 0 2021-08-09 12:29 > /warehouse/tablespace/managed/hive/test/delete_delta_0000011_0000011_0001 > #drwxrwx---+ - hive hive 0 2021-08-09 12:28 > /warehouse/tablespace/managed/hive/test/delta_0000009_0000009_0003 > #drwxrwx---+ - hive hive 0 2021-08-09 12:29 > /warehouse/tablespace/managed/hive/test/delta_0000010_0000010_0003 > #drwxrwx---+ - hive hive 0 2021-08-09 12:29 > /warehouse/tablespace/managed/hive/test/delta_0000011_0000011_0003 > alter table test compact 'major' and wait; > select * from test; > #result is empty > dfs -ls '/warehouse/tablespace/managed/hive/test'; > #2 drwxrwx---+ - hive hive 0 2021-08-09 12:31 > /warehouse/tablespace/managed/hive/test/base_0000011_v0000428 > {code} > Some logs from the above example: > {code} > 2021-08-09 12:30:37,532 WARN > org.apache.hadoop.hive.ql.txn.compactor.CompactorMR: > [nightly-7x-us-2-2.nightly-7x-us-2.root.hwx.site-49_executor]: 6 delta files > found for default.test located at > hdfs://nightly-7x-us-2-2.nightly-7x-us-2.root.hwx.site:8020/warehouse/tablespace/managed/hive/test! > This is likely a sign of misconfiguration, especially if this message > repeats. Check that compaction is running properly. Check for any > runaway/mis-configured process writing to ACID tables, especially using > Streaming Ingest API. > 2021-08-09 12:30:37,533 INFO > org.apache.hadoop.hive.ql.txn.compactor.CompactorMR: > [nightly-7x-us-2-2.nightly-7x-us-2.root.hwx.site-49_executor]: Submitting > MINOR compaction job > 'nightly-7x-us-2-2.nightly-7x-us-2.root.hwx.site-49-compactor-default.test_0' > to default queue. (current delta dirs count=5, obsolete delta dirs count=-1. > TxnIdRange[9,11] > 2021-08-09 12:30:38,003 INFO > org.apache.hadoop.hive.ql.txn.compactor.CompactorMR: > [nightly-7x-us-2-2.nightly-7x-us-2.root.hwx.site-49_executor]: Submitted > compaction job > 'nightly-7x-us-2-2.nightly-7x-us-2.root.hwx.site-49-compactor-default.test_0' > with jobID=job_1628497133224_0051 compaction ID=23 > #From app logs of the minor compaction, note that delta_0000011_0000011_0001 > is missing from the list > 2021-08-09 12:30:47,399 INFO [main] org.apache.hadoop.mapred.MapTask: > Processing split: CompactorInputSplit{base: null, bucket: 0, length: 3231, > deltas: [delete_delta_0000009_0000009_0001, delta_0000009_0000009_0003, > delete_delta_0000010_0000010_0001, delta_0000010_0000010_0003, > delete_delta_0000011_0000011_0001]} > 2021-08-09 12:30:53,061 INFO > org.apache.hadoop.hive.ql.txn.compactor.CompactorMR: > [nightly-7x-us-2-2.nightly-7x-us-2.root.hwx.site-49_executor]: Submitting > MAJOR compaction job > 'nightly-7x-us-2-2.nightly-7x-us-2.root.hwx.site-49-compactor-default.test' > to default queue. (current delta dirs count=2, obsolete delta dirs count=6. > TxnIdRange[9,11] > 2021-08-09 12:30:53,501 INFO > org.apache.hadoop.hive.ql.txn.compactor.CompactorMR: > [nightly-7x-us-2-2.nightly-7x-us-2.root.hwx.site-49_executor]: Submitted > compaction job > 'nightly-7x-us-2-2.nightly-7x-us-2.root.hwx.site-49-compactor-default.test' > with jobID=job_1628497133224_0052 compaction ID=23 > 2021-08-09 12:31:03,493 INFO [main] org.apache.hadoop.mapred.MapTask: > Processing split: CompactorInputSplit{base: > hdfs://nightly-7x-us-2-2.nightly-7x-us-2.root.hwx.site:8020/warehouse/tablespace/managed/hive/test/base_0000008_v0000416, > bucket: 0, length: 1697, deltas: [delete_delta_0000009_0000011_v0000428, > delta_0000009_0000011_v0000428]} > {code} -- This message was sent by Atlassian Jira (v8.3.4#803005)