[ https://issues.apache.org/jira/browse/HUDI-1286?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Nishith Agarwal updated HUDI-1286: ---------------------------------- Description: When running the following query -> {code:java} select count(*) from testdb.table1_rt {code} we see the following exception in hiveserver : {code:java} 2020-09-16T03:41:07,668 INFO LocalJobRunner Map Task Executor #0: realtime.AbstractRealtimeRecordReader (AbstractRealtimeRecordReader.java:init(88)) - Writer Schema From Parquet => [_hoodie_commit_time type:UNION pos:0, _hoodie_commit_seqno type:UNION pos:1, _hoodie_record_key type:UNION pos:2, _hoodie_partition_path type:UNION pos:3, _hoodie_file_name type:UNION pos:4, timestamp type:LONG pos:5, _row_key type:STRING pos:6, rider type:STRING pos:7, driver type:STRING pos:8, begin_lat type:DOUBLE pos:9, begin_lon type:DOUBLE pos:10, end_lat type:DOUBLE pos:11, end_lon type:DOUBLE pos:12, fare type:DOUBLE pos:13]2020-09-16T03:41:07,668 INFO LocalJobRunner Map Task Executor #0: realtime.AbstractRealtimeRecordReader (AbstractRealtimeRecordReader.java:init(88)) - Writer Schema From Parquet => [_hoodie_commit_time type:UNION pos:0, _hoodie_commit_seqno type:UNION pos:1, _hoodie_record_key type:UNION pos:2, _hoodie_partition_path type:UNION pos:3, _hoodie_file_name type:UNION pos:4, timestamp type:LONG pos:5, _row_key type:STRING pos:6, rider type:STRING pos:7, driver type:STRING pos:8, begin_lat type:DOUBLE pos:9, begin_lon type:DOUBLE pos:10, end_lat type:DOUBLE pos:11, end_lon type:DOUBLE pos:12, fare type:DOUBLE pos:13]2020-09-16T03:41:07,670 INFO [Thread-465]: mapred.LocalJobRunner (LocalJobRunner.java:runTasks(483)) - map task executor complete.2020-09-16T03:41:07,671 WARN [Thread-465]: mapred.LocalJobRunner (LocalJobRunner.java:run(587)) - job_local242522391_0010java.lang.Exception: java.io.IOException: org.apache.hudi.exception.HoodieException: Error ordering fields for storage read. #fieldNames: 4, #fieldPositions: 5 at org.apache.hadoop.mapred.LocalJobRunner$Job.runTasks(LocalJobRunner.java:489) ~[hadoop-mapreduce-client-common-2.8.4.jar:?] at org.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:549) ~[hadoop-mapreduce-client-common-2.8.4.jar:?]Caused by: java.io.IOException: org.apache.hudi.exception.HoodieException: Error ordering fields for storage read. #fieldNames: 4, #fieldPositions: 5 at org.apache.hadoop.hive.io.HiveIOExceptionHandlerChain.handleRecordReaderCreationException(HiveIOExceptionHandlerChain.java:97) ~[hive-exec-2.3.3.jar:2.3.3] at org.apache.hadoop.hive.io.HiveIOExceptionHandlerUtil.handleRecordReaderCreationException(HiveIOExceptionHandlerUtil.java:57) ~[hive-exec-2.3.3.jar:2.3.3] at org.apache.hadoop.hive.ql.io.HiveInputFormat.getRecordReader(HiveInputFormat.java:379) ~[hive-exec-2.3.3.jar:2.3.3] at org.apache.hadoop.mapred.MapTask$TrackedRecordReader.<init>(MapTask.java:169) ~[hadoop-mapreduce-client-core-2.8.4.jar:?] at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:432) ~[hadoop-mapreduce-client-core-2.8.4.jar:?] at org.apache.hadoop.mapred.MapTask.run(MapTask.java:343) ~[hadoop-mapreduce-client-core-2.8.4.jar:?] at org.apache.hadoop.mapred.LocalJobRunner$Job$MapTaskRunnable.run(LocalJobRunner.java:270) ~[hadoop-mapreduce-client-common-2.8.4.jar:?] at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) ~[?:1.8.0_212] at java.util.concurrent.FutureTask.run(FutureTask.java:266) ~[?:1.8.0_212] at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) [?:1.8.0_212] at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) [?:1.8.0_212] at java.lang.Thread.run(Thread.java:748) [?:1.8.0_212]Caused by: org.apache.hudi.exception.HoodieException: Error ordering fields for storage read. #fieldNames: 4, #fieldPositions: 5 at org.apache.hudi.hadoop.utils.HoodieRealtimeRecordReaderUtils.orderFields(HoodieRealtimeRecordReaderUtils.java:258) ~[hoodie-hadoop-mr-bundle.jar:0.6.1-SNAPSHOT] at org.apache.hudi.hadoop.realtime.AbstractRealtimeRecordReader.init(AbstractRealtimeRecordReader.java:99) ~[hoodie-hadoop-mr-bundle.jar:0.6.1-SNAPSHOT] at org.apache.hudi.hadoop.realtime.AbstractRealtimeRecordReader.<init>(AbstractRealtimeRecordReader.java:67) ~[hoodie-hadoop-mr-bundle.jar:0.6.1-SNAPSHOT] at org.apache.hudi.hadoop.realtime.RealtimeCompactedRecordReader.<init>(RealtimeCompactedRecordReader.java:53) ~[hoodie-hadoop-mr-bundle.jar:0.6.1-SNAPSHOT] at org.apache.hudi.hadoop.realtime.HoodieRealtimeRecordReader.constructRecordReader(HoodieRealtimeRecordReader.java:70) ~[hoodie-hadoop-mr-bundle.jar:0.6.1-SNAPSHOT] at org.apache.hudi.hadoop.realtime.HoodieRealtimeRecordReader.<init>(HoodieRealtimeRecordReader.java:47) ~[hoodie-hadoop-mr-bundle.jar:0.6.1-SNAPSHOT] at org.apache.hudi.hadoop.realtime.HoodieParquetRealtimeInputFormat.getRecordReader(HoodieParquetRealtimeInputFormat.java:120) ~[hoodie-hadoop-mr-bundle.jar:0.6.1-SNAPSHOT] at org.apache.hadoop.hive.ql.io.HiveInputFormat.getRecordReader(HiveInputFormat.java:376) ~[hive-exec-2.3.3.jar:2.3.3] at org.apache.hadoop.mapred.MapTask$TrackedRecordReader.<init>(MapTask.java:169) ~[hadoop-mapreduce-client-core-2.8.4.jar:?] at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:432) ~[hadoop-mapreduce-client-core-2.8.4.jar:?] at org.apache.hadoop.mapred.MapTask.run(MapTask.java:343) ~[hadoop-mapreduce-client-core-2.8.4.jar:?] at org.apache.hadoop.mapred.LocalJobRunner$Job$MapTaskRunnable.run(LocalJobRunner.java:270) ~[hadoop-mapreduce-client-common-2.8.4.jar:?] at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) ~[?:1.8.0_212] at java.util.concurrent.FutureTask.run(FutureTask.java:266) ~[?:1.8.0_212] at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) ~[?:1.8.0_212] at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) ~[?:1.8.0_212] at java.lang.Thread.run(Thread.java:748) ~[?:1.8.0_212]{code} This issue was seen during making HiveCombineInputFormat work with real time tables, we are seeing this issue again. was: When running the following query -> {code:java} select count(*) from testdb.table1_rt {code} we see the following exception in hiveserver : {code:java} 2020-09-16T03:41:07,668 INFO LocalJobRunner Map Task Executor #0: realtime.AbstractRealtimeRecordReader (AbstractRealtimeRecordReader.java:init(88)) - Writer Schema From Parquet => [_hoodie_commit_time type:UNION pos:0, _hoodie_commit_seqno type:UNION pos:1, _hoodie_record_key type:UNION pos:2, _hoodie_partition_path type:UNION pos:3, _hoodie_file_name type:UNION pos:4, timestamp type:LONG pos:5, _row_key type:STRING pos:6, rider type:STRING pos:7, driver type:STRING pos:8, begin_lat type:DOUBLE pos:9, begin_lon type:DOUBLE pos:10, end_lat type:DOUBLE pos:11, end_lon type:DOUBLE pos:12, fare type:DOUBLE pos:13]2020-09-16T03:41:07,668 INFO LocalJobRunner Map Task Executor #0: realtime.AbstractRealtimeRecordReader (AbstractRealtimeRecordReader.java:init(88)) - Writer Schema From Parquet => [_hoodie_commit_time type:UNION pos:0, _hoodie_commit_seqno type:UNION pos:1, _hoodie_record_key type:UNION pos:2, _hoodie_partition_path type:UNION pos:3, _hoodie_file_name type:UNION pos:4, timestamp type:LONG pos:5, _row_key type:STRING pos:6, rider type:STRING pos:7, driver type:STRING pos:8, begin_lat type:DOUBLE pos:9, begin_lon type:DOUBLE pos:10, end_lat type:DOUBLE pos:11, end_lon type:DOUBLE pos:12, fare type:DOUBLE pos:13]2020-09-16T03:41:07,670 INFO [Thread-465]: mapred.LocalJobRunner (LocalJobRunner.java:runTasks(483)) - map task executor complete.2020-09-16T03:41:07,671 WARN [Thread-465]: mapred.LocalJobRunner (LocalJobRunner.java:run(587)) - job_local242522391_0010java.lang.Exception: java.io.IOException: org.apache.hudi.exception.HoodieException: Error ordering fields for storage read. #fieldNames: 4, #fieldPositions: 5 at org.apache.hadoop.mapred.LocalJobRunner$Job.runTasks(LocalJobRunner.java:489) ~[hadoop-mapreduce-client-common-2.8.4.jar:?] at org.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:549) ~[hadoop-mapreduce-client-common-2.8.4.jar:?]Caused by: java.io.IOException: org.apache.hudi.exception.HoodieException: Error ordering fields for storage read. #fieldNames: 4, #fieldPositions: 5 at org.apache.hadoop.hive.io.HiveIOExceptionHandlerChain.handleRecordReaderCreationException(HiveIOExceptionHandlerChain.java:97) ~[hive-exec-2.3.3.jar:2.3.3] at org.apache.hadoop.hive.io.HiveIOExceptionHandlerUtil.handleRecordReaderCreationException(HiveIOExceptionHandlerUtil.java:57) ~[hive-exec-2.3.3.jar:2.3.3] at org.apache.hadoop.hive.ql.io.HiveInputFormat.getRecordReader(HiveInputFormat.java:379) ~[hive-exec-2.3.3.jar:2.3.3] at org.apache.hadoop.mapred.MapTask$TrackedRecordReader.<init>(MapTask.java:169) ~[hadoop-mapreduce-client-core-2.8.4.jar:?] at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:432) ~[hadoop-mapreduce-client-core-2.8.4.jar:?] at org.apache.hadoop.mapred.MapTask.run(MapTask.java:343) ~[hadoop-mapreduce-client-core-2.8.4.jar:?] at org.apache.hadoop.mapred.LocalJobRunner$Job$MapTaskRunnable.run(LocalJobRunner.java:270) ~[hadoop-mapreduce-client-common-2.8.4.jar:?] at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) ~[?:1.8.0_212] at java.util.concurrent.FutureTask.run(FutureTask.java:266) ~[?:1.8.0_212] at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) [?:1.8.0_212] at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) [?:1.8.0_212] at java.lang.Thread.run(Thread.java:748) [?:1.8.0_212]Caused by: org.apache.hudi.exception.HoodieException: Error ordering fields for storage read. #fieldNames: 4, #fieldPositions: 5 at org.apache.hudi.hadoop.utils.HoodieRealtimeRecordReaderUtils.orderFields(HoodieRealtimeRecordReaderUtils.java:258) ~[hoodie-hadoop-mr-bundle.jar:0.6.1-SNAPSHOT] at org.apache.hudi.hadoop.realtime.AbstractRealtimeRecordReader.init(AbstractRealtimeRecordReader.java:99) ~[hoodie-hadoop-mr-bundle.jar:0.6.1-SNAPSHOT] at org.apache.hudi.hadoop.realtime.AbstractRealtimeRecordReader.<init>(AbstractRealtimeRecordReader.java:67) ~[hoodie-hadoop-mr-bundle.jar:0.6.1-SNAPSHOT] at org.apache.hudi.hadoop.realtime.RealtimeCompactedRecordReader.<init>(RealtimeCompactedRecordReader.java:53) ~[hoodie-hadoop-mr-bundle.jar:0.6.1-SNAPSHOT] at org.apache.hudi.hadoop.realtime.HoodieRealtimeRecordReader.constructRecordReader(HoodieRealtimeRecordReader.java:70) ~[hoodie-hadoop-mr-bundle.jar:0.6.1-SNAPSHOT] at org.apache.hudi.hadoop.realtime.HoodieRealtimeRecordReader.<init>(HoodieRealtimeRecordReader.java:47) ~[hoodie-hadoop-mr-bundle.jar:0.6.1-SNAPSHOT] at org.apache.hudi.hadoop.realtime.HoodieParquetRealtimeInputFormat.getRecordReader(HoodieParquetRealtimeInputFormat.java:120) ~[hoodie-hadoop-mr-bundle.jar:0.6.1-SNAPSHOT] at org.apache.hadoop.hive.ql.io.HiveInputFormat.getRecordReader(HiveInputFormat.java:376) ~[hive-exec-2.3.3.jar:2.3.3] at org.apache.hadoop.mapred.MapTask$TrackedRecordReader.<init>(MapTask.java:169) ~[hadoop-mapreduce-client-core-2.8.4.jar:?] at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:432) ~[hadoop-mapreduce-client-core-2.8.4.jar:?] at org.apache.hadoop.mapred.MapTask.run(MapTask.java:343) ~[hadoop-mapreduce-client-core-2.8.4.jar:?] at org.apache.hadoop.mapred.LocalJobRunner$Job$MapTaskRunnable.run(LocalJobRunner.java:270) ~[hadoop-mapreduce-client-common-2.8.4.jar:?] at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) ~[?:1.8.0_212] at java.util.concurrent.FutureTask.run(FutureTask.java:266) ~[?:1.8.0_212] at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) ~[?:1.8.0_212] at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) ~[?:1.8.0_212] at java.lang.Thread.run(Thread.java:748) ~[?:1.8.0_212]{code} This issue was seen during making HiveCombineInputFormat work with real time tables, we are seeing this issue again. > Merge On Read queries (_rt) fails on docker demo for test suite > --------------------------------------------------------------- > > Key: HUDI-1286 > URL: https://issues.apache.org/jira/browse/HUDI-1286 > Project: Apache Hudi > Issue Type: Bug > Components: Testing > Reporter: Nishith Agarwal > Assignee: Nishith Agarwal > Priority: Major > Fix For: 0.6.1 > > > When running the following query -> > {code:java} > select count(*) from testdb.table1_rt > {code} > we see the following exception in hiveserver : > {code:java} > 2020-09-16T03:41:07,668 INFO LocalJobRunner Map Task Executor #0: > realtime.AbstractRealtimeRecordReader > (AbstractRealtimeRecordReader.java:init(88)) - Writer Schema From Parquet => > [_hoodie_commit_time type:UNION pos:0, _hoodie_commit_seqno type:UNION pos:1, > _hoodie_record_key type:UNION pos:2, _hoodie_partition_path type:UNION pos:3, > _hoodie_file_name type:UNION pos:4, timestamp type:LONG pos:5, _row_key > type:STRING pos:6, rider type:STRING pos:7, driver type:STRING pos:8, > begin_lat type:DOUBLE pos:9, begin_lon type:DOUBLE pos:10, end_lat > type:DOUBLE pos:11, end_lon type:DOUBLE pos:12, fare type:DOUBLE > pos:13]2020-09-16T03:41:07,668 INFO LocalJobRunner Map Task Executor #0: > realtime.AbstractRealtimeRecordReader > (AbstractRealtimeRecordReader.java:init(88)) - Writer Schema From Parquet => > [_hoodie_commit_time type:UNION pos:0, _hoodie_commit_seqno type:UNION pos:1, > _hoodie_record_key type:UNION pos:2, _hoodie_partition_path type:UNION pos:3, > _hoodie_file_name type:UNION pos:4, timestamp type:LONG pos:5, _row_key > type:STRING pos:6, rider type:STRING pos:7, driver type:STRING pos:8, > begin_lat type:DOUBLE pos:9, begin_lon type:DOUBLE pos:10, end_lat > type:DOUBLE pos:11, end_lon type:DOUBLE pos:12, fare type:DOUBLE > pos:13]2020-09-16T03:41:07,670 INFO [Thread-465]: mapred.LocalJobRunner > (LocalJobRunner.java:runTasks(483)) - map task executor > complete.2020-09-16T03:41:07,671 WARN [Thread-465]: mapred.LocalJobRunner > (LocalJobRunner.java:run(587)) - job_local242522391_0010java.lang.Exception: > java.io.IOException: org.apache.hudi.exception.HoodieException: Error > ordering fields for storage read. #fieldNames: 4, #fieldPositions: 5 at > org.apache.hadoop.mapred.LocalJobRunner$Job.runTasks(LocalJobRunner.java:489) > ~[hadoop-mapreduce-client-common-2.8.4.jar:?] at > org.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:549) > ~[hadoop-mapreduce-client-common-2.8.4.jar:?]Caused by: java.io.IOException: > org.apache.hudi.exception.HoodieException: Error ordering fields for storage > read. #fieldNames: 4, #fieldPositions: 5 at > org.apache.hadoop.hive.io.HiveIOExceptionHandlerChain.handleRecordReaderCreationException(HiveIOExceptionHandlerChain.java:97) > ~[hive-exec-2.3.3.jar:2.3.3] at > org.apache.hadoop.hive.io.HiveIOExceptionHandlerUtil.handleRecordReaderCreationException(HiveIOExceptionHandlerUtil.java:57) > ~[hive-exec-2.3.3.jar:2.3.3] at > org.apache.hadoop.hive.ql.io.HiveInputFormat.getRecordReader(HiveInputFormat.java:379) > ~[hive-exec-2.3.3.jar:2.3.3] at > org.apache.hadoop.mapred.MapTask$TrackedRecordReader.<init>(MapTask.java:169) > ~[hadoop-mapreduce-client-core-2.8.4.jar:?] at > org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:432) > ~[hadoop-mapreduce-client-core-2.8.4.jar:?] at > org.apache.hadoop.mapred.MapTask.run(MapTask.java:343) > ~[hadoop-mapreduce-client-core-2.8.4.jar:?] at > org.apache.hadoop.mapred.LocalJobRunner$Job$MapTaskRunnable.run(LocalJobRunner.java:270) > ~[hadoop-mapreduce-client-common-2.8.4.jar:?] at > java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) > ~[?:1.8.0_212] at java.util.concurrent.FutureTask.run(FutureTask.java:266) > ~[?:1.8.0_212] at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > [?:1.8.0_212] at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > [?:1.8.0_212] at java.lang.Thread.run(Thread.java:748) [?:1.8.0_212]Caused > by: org.apache.hudi.exception.HoodieException: Error ordering fields for > storage read. #fieldNames: 4, #fieldPositions: 5 at > org.apache.hudi.hadoop.utils.HoodieRealtimeRecordReaderUtils.orderFields(HoodieRealtimeRecordReaderUtils.java:258) > ~[hoodie-hadoop-mr-bundle.jar:0.6.1-SNAPSHOT] at > org.apache.hudi.hadoop.realtime.AbstractRealtimeRecordReader.init(AbstractRealtimeRecordReader.java:99) > ~[hoodie-hadoop-mr-bundle.jar:0.6.1-SNAPSHOT] at > org.apache.hudi.hadoop.realtime.AbstractRealtimeRecordReader.<init>(AbstractRealtimeRecordReader.java:67) > ~[hoodie-hadoop-mr-bundle.jar:0.6.1-SNAPSHOT] at > org.apache.hudi.hadoop.realtime.RealtimeCompactedRecordReader.<init>(RealtimeCompactedRecordReader.java:53) > ~[hoodie-hadoop-mr-bundle.jar:0.6.1-SNAPSHOT] at > org.apache.hudi.hadoop.realtime.HoodieRealtimeRecordReader.constructRecordReader(HoodieRealtimeRecordReader.java:70) > ~[hoodie-hadoop-mr-bundle.jar:0.6.1-SNAPSHOT] at > org.apache.hudi.hadoop.realtime.HoodieRealtimeRecordReader.<init>(HoodieRealtimeRecordReader.java:47) > ~[hoodie-hadoop-mr-bundle.jar:0.6.1-SNAPSHOT] at > org.apache.hudi.hadoop.realtime.HoodieParquetRealtimeInputFormat.getRecordReader(HoodieParquetRealtimeInputFormat.java:120) > ~[hoodie-hadoop-mr-bundle.jar:0.6.1-SNAPSHOT] at > org.apache.hadoop.hive.ql.io.HiveInputFormat.getRecordReader(HiveInputFormat.java:376) > ~[hive-exec-2.3.3.jar:2.3.3] at > org.apache.hadoop.mapred.MapTask$TrackedRecordReader.<init>(MapTask.java:169) > ~[hadoop-mapreduce-client-core-2.8.4.jar:?] at > org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:432) > ~[hadoop-mapreduce-client-core-2.8.4.jar:?] at > org.apache.hadoop.mapred.MapTask.run(MapTask.java:343) > ~[hadoop-mapreduce-client-core-2.8.4.jar:?] at > org.apache.hadoop.mapred.LocalJobRunner$Job$MapTaskRunnable.run(LocalJobRunner.java:270) > ~[hadoop-mapreduce-client-common-2.8.4.jar:?] at > java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) > ~[?:1.8.0_212] at java.util.concurrent.FutureTask.run(FutureTask.java:266) > ~[?:1.8.0_212] at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > ~[?:1.8.0_212] at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > ~[?:1.8.0_212] at java.lang.Thread.run(Thread.java:748) ~[?:1.8.0_212]{code} > > This issue was seen during making HiveCombineInputFormat work with real time > tables, we are seeing this issue again. -- This message was sent by Atlassian Jira (v8.3.4#803005)