[ https://issues.apache.org/jira/browse/HUDI-5526?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Brandon Scheller updated HUDI-5526: ----------------------------------- Description: Hive "count" queries fail on hudi bootstrap tables when they are using Hive3. This has been tested on all EMR-6.x releases and fails with the same error. The same query works with Hive2. For example with the query: {code:java} SELECT COUNT(*) FROM HUDI_BOOTSTRAP_TABLE;{code} Gives the following error: {code:java} TaskAttempt 1 failed, info=[Error: Error while running task ( failure ) : attempt_1672881902089_0008_1_00_000000_1:java.lang.RuntimeException: java.lang.RuntimeException: java.io.IOException: java.lang.RuntimeException: java.io.IOException: cannot find dir = [s3://my-bucket/test-data/hudi/parquet-source-tables/hive_style_partitioned_tb/event_type=two/part-00000-98fb0380-374c-40f5-8a57-89d95270a2c3-c000.parquet] in pathToPartitionInfo: [ [s3://my-bucket/hudi-table/test_bootstrap_hive_partitionedrt/event_type=one] , [s3://my-bucket/hudi-table/test_bootstrap_hive_partitionedrt/event_type=two] ] at org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:296) at org.apache.hadoop.hive.ql.exec.tez.TezProcessor.run(TezProcessor.java:250) at org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.run(LogicalIOProcessorRuntimeTask.java:374) at org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:73) at org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:61) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:422) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1730) at org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:61) at org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:37) at org.apache.tez.common.CallableWithNdc.call(CallableWithNdc.java:36) at java.util.concurrent.FutureTask.run(FutureTask.java:266) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:750) Caused by: java.lang.RuntimeException: java.io.IOException: java.lang.RuntimeException: java.io.IOException: cannot find dir = [s3://my-bucket/test-data/hudi/parquet-source-tables/hive_style_partitioned_tb/event_type=two/part-00000-98fb0380-374c-40f5-8a57-89d95270a2c3-c000.parquet] in pathToPartitionInfo: [[s3://my-bucket/hudi-table/test_bootstrap_hive_partitionedrt/event_type=one], [s3://my-bucket/hudi-table/test_bootstrap_hive_partitionedrt/event_type=two]] at org.apache.hadoop.mapred.split.TezGroupedSplitsInputFormat$TezGroupedSplitsRecordReader.initNextRecordReader(TezGroupedSplitsInputFormat.java:206) at org.apache.hadoop.mapred.split.TezGroupedSplitsInputFormat$TezGroupedSplitsRecordReader.<init>(TezGroupedSplitsInputFormat.java:145) at org.apache.hadoop.mapred.split.TezGroupedSplitsInputFormat.getRecordReader(TezGroupedSplitsInputFormat.java:111) at org.apache.tez.mapreduce.lib.MRReaderMapred.setupOldRecordReader(MRReaderMapred.java:157) at org.apache.tez.mapreduce.lib.MRReaderMapred.setSplit(MRReaderMapred.java:83) at org.apache.tez.mapreduce.input.MRInput.initFromEventInternal(MRInput.java:703) at org.apache.tez.mapreduce.input.MRInput.initFromEvent(MRInput.java:662) at org.apache.tez.mapreduce.input.MRInputLegacy.checkAndAwaitRecordReaderInitialization(MRInputLegacy.java:150) at org.apache.tez.mapreduce.input.MRInputLegacy.init(MRInputLegacy.java:114) at org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.getMRInput(MapRecordProcessor.java:525) at org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.init(MapRecordProcessor.java:171) at org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:266) ... 14 more Caused by: java.io.IOException: java.lang.RuntimeException: java.io.IOException: cannot find dir = [s3://my-bucket/test-data/hudi/parquet-source-tables/hive_style_partitioned_tb/event_type=two/part-00000-98fb0380-374c-40f5-8a57-89d95270a2c3-c000.parquet] in pathToPartitionInfo: [[s3://my-bucket/hudi-table/test_bootstrap_hive_partitionedrt/event_type=one], [s3://my-bucket/hudi-table/test_bootstrap_hive_partitionedrt/event_type=two]] at [org.apache.hadoop.hive.io|http://org.apache.hadoop.hive.io/].HiveIOExceptionHandlerChain.handleRecordReaderCreationException(HiveIOExceptionHandlerChain.java:97) at [org.apache.hadoop.hive.io|http://org.apache.hadoop.hive.io/].HiveIOExceptionHandlerUtil.handleRecordReaderCreationException(HiveIOExceptionHandlerUtil.java:57) at [org.apache.hadoop.hive.ql.io|http://org.apache.hadoop.hive.ql.io/].HiveInputFormat.getRecordReader(HiveInputFormat.java:421) at org.apache.hadoop.mapred.split.TezGroupedSplitsInputFormat$TezGroupedSplitsRecordReader.initNextRecordReader(TezGroupedSplitsInputFormat.java:203) ... 25 more Caused by: java.lang.RuntimeException: java.io.IOException: cannot find dir = [s3://my-bucket/test-data/hudi/parquet-source-tables/hive_style_partitioned_tb/event_type=two/part-00000-98fb0380-374c-40f5-8a57-89d95270a2c3-c000.parquet] in pathToPartitionInfo: [[s3://my-bucket/hudi-table/test_bootstrap_hive_partitionedrt/event_type=one], [s3://my-bucket/hudi-table/test_bootstrap_hive_partitionedrt/event_type=two]] at [org.apache.hadoop.hive.ql.io|http://org.apache.hadoop.hive.ql.io/].parquet.vector.VectorizedParquetRecordReader.<init>(VectorizedParquetRecordReader.java:156) at [org.apache.hadoop.hive.ql.io|http://org.apache.hadoop.hive.ql.io/].parquet.VectorizedParquetInputFormat.getRecordReader(VectorizedParquetInputFormat.java:50) at [org.apache.hadoop.hive.ql.io|http://org.apache.hadoop.hive.ql.io/].parquet.MapredParquetInputFormat.getRecordReader(MapredParquetInputFormat.java:87) at org.apache.hudi.hadoop.HoodieParquetInputFormat.getRecordReader(HoodieParquetInputFormat.java:203) at [org.apache.hadoop.hive.ql.io|http://org.apache.hadoop.hive.ql.io/].HiveInputFormat.getRecordReader(HiveInputFormat.java:418) ... 26 more Caused by: java.io.IOException: cannot find dir = [s3://my-bucket/test-data/hudi/parquet-source-tables/hive_style_partitioned_tb/event_type=two/part-00000-98fb0380-374c-40f5-8a57-89d95270a2c3-c000.parquet] in pathToPartitionInfo: [[s3://my-bucket/hudi-table/test_bootstrap_hive_partitionedrt/event_type=one], [s3://my-bucket/hudi-table/test_bootstrap_hive_partitionedrt/event_type=two]] at [org.apache.hadoop.hive.ql.io|http://org.apache.hadoop.hive.ql.io/].HiveFileFormatUtils.getFromPathRecursively(HiveFileFormatUtils.java:402) at [org.apache.hadoop.hive.ql.io|http://org.apache.hadoop.hive.ql.io/].HiveFileFormatUtils.getFromPathRecursively(HiveFileFormatUtils.java:371) at [org.apache.hadoop.hive.ql.io|http://org.apache.hadoop.hive.ql.io/].HiveFileFormatUtils.getFromPathRecursively(HiveFileFormatUtils.java:366) at org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx.getPartitionValues(VectorizedRowBatchCtx.java:272) at org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx.getPartitionValues(VectorizedRowBatchCtx.java:263) at [org.apache.hadoop.hive.ql.io|http://org.apache.hadoop.hive.ql.io/].parquet.vector.VectorizedParquetRecordReader.initPartitionValues(VectorizedParquetRecordReader.java:164) at [org.apache.hadoop.hive.ql.io|http://org.apache.hadoop.hive.ql.io/].parquet.vector.VectorizedParquetRecordReader.<init>(VectorizedParquetRecordReader.java:153) ... 30 more {code} was: Hive "count" queries fail on hudi bootstrap tables when they are using Hive3. This has been tested on all EMR-6.x releases and fails with the same error. The same query works with Hive2. For example with the query: {code:java} SELECT COUNT(*) FROM HUDI_BOOTSTRAP_TABLE;{code} Gives the following error: {code:java} TaskAttempt 1 failed, info=[Error: Error while running task ( failure ) : attempt_1672881902089_0008_1_00_000000_1:java.lang.RuntimeException: java.lang.RuntimeException: java.io.IOException: java.lang.RuntimeException: java.io.IOException: cannot find dir = [s3://yxchang-emr-dev/test-data/hudi/parquet-source-tables/hive_style_partitioned_tb/event_type=two/part-00000-98fb0380-374c-40f5-8a57-89d95270a2c3-c000.parquet] in pathToPartitionInfo: [ [s3://bschelle-emr-dev2/hudi-table/test_bootstrap_hive_partitionedrt/event_type=one] , [s3://bschelle-emr-dev2/hudi-table/test_bootstrap_hive_partitionedrt/event_type=two] ] at org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:296) at org.apache.hadoop.hive.ql.exec.tez.TezProcessor.run(TezProcessor.java:250) at org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.run(LogicalIOProcessorRuntimeTask.java:374) at org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:73) at org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:61) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:422) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1730) at org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:61) at org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:37) at org.apache.tez.common.CallableWithNdc.call(CallableWithNdc.java:36) at java.util.concurrent.FutureTask.run(FutureTask.java:266) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:750) Caused by: java.lang.RuntimeException: java.io.IOException: java.lang.RuntimeException: java.io.IOException: cannot find dir = [s3://yxchang-emr-dev/test-data/hudi/parquet-source-tables/hive_style_partitioned_tb/event_type=two/part-00000-98fb0380-374c-40f5-8a57-89d95270a2c3-c000.parquet] in pathToPartitionInfo: [[s3://bschelle-emr-dev2/hudi-table/test_bootstrap_hive_partitionedrt/event_type=one], [s3://bschelle-emr-dev2/hudi-table/test_bootstrap_hive_partitionedrt/event_type=two]] at org.apache.hadoop.mapred.split.TezGroupedSplitsInputFormat$TezGroupedSplitsRecordReader.initNextRecordReader(TezGroupedSplitsInputFormat.java:206) at org.apache.hadoop.mapred.split.TezGroupedSplitsInputFormat$TezGroupedSplitsRecordReader.<init>(TezGroupedSplitsInputFormat.java:145) at org.apache.hadoop.mapred.split.TezGroupedSplitsInputFormat.getRecordReader(TezGroupedSplitsInputFormat.java:111) at org.apache.tez.mapreduce.lib.MRReaderMapred.setupOldRecordReader(MRReaderMapred.java:157) at org.apache.tez.mapreduce.lib.MRReaderMapred.setSplit(MRReaderMapred.java:83) at org.apache.tez.mapreduce.input.MRInput.initFromEventInternal(MRInput.java:703) at org.apache.tez.mapreduce.input.MRInput.initFromEvent(MRInput.java:662) at org.apache.tez.mapreduce.input.MRInputLegacy.checkAndAwaitRecordReaderInitialization(MRInputLegacy.java:150) at org.apache.tez.mapreduce.input.MRInputLegacy.init(MRInputLegacy.java:114) at org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.getMRInput(MapRecordProcessor.java:525) at org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.init(MapRecordProcessor.java:171) at org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:266) ... 14 more Caused by: java.io.IOException: java.lang.RuntimeException: java.io.IOException: cannot find dir = [s3://yxchang-emr-dev/test-data/hudi/parquet-source-tables/hive_style_partitioned_tb/event_type=two/part-00000-98fb0380-374c-40f5-8a57-89d95270a2c3-c000.parquet] in pathToPartitionInfo: [[s3://bschelle-emr-dev2/hudi-table/test_bootstrap_hive_partitionedrt/event_type=one], [s3://bschelle-emr-dev2/hudi-table/test_bootstrap_hive_partitionedrt/event_type=two]] at [org.apache.hadoop.hive.io|http://org.apache.hadoop.hive.io/].HiveIOExceptionHandlerChain.handleRecordReaderCreationException(HiveIOExceptionHandlerChain.java:97) at [org.apache.hadoop.hive.io|http://org.apache.hadoop.hive.io/].HiveIOExceptionHandlerUtil.handleRecordReaderCreationException(HiveIOExceptionHandlerUtil.java:57) at [org.apache.hadoop.hive.ql.io|http://org.apache.hadoop.hive.ql.io/].HiveInputFormat.getRecordReader(HiveInputFormat.java:421) at org.apache.hadoop.mapred.split.TezGroupedSplitsInputFormat$TezGroupedSplitsRecordReader.initNextRecordReader(TezGroupedSplitsInputFormat.java:203) ... 25 more Caused by: java.lang.RuntimeException: java.io.IOException: cannot find dir = [s3://yxchang-emr-dev/test-data/hudi/parquet-source-tables/hive_style_partitioned_tb/event_type=two/part-00000-98fb0380-374c-40f5-8a57-89d95270a2c3-c000.parquet] in pathToPartitionInfo: [[s3://bschelle-emr-dev2/hudi-table/test_bootstrap_hive_partitionedrt/event_type=one], [s3://bschelle-emr-dev2/hudi-table/test_bootstrap_hive_partitionedrt/event_type=two]] at [org.apache.hadoop.hive.ql.io|http://org.apache.hadoop.hive.ql.io/].parquet.vector.VectorizedParquetRecordReader.<init>(VectorizedParquetRecordReader.java:156) at [org.apache.hadoop.hive.ql.io|http://org.apache.hadoop.hive.ql.io/].parquet.VectorizedParquetInputFormat.getRecordReader(VectorizedParquetInputFormat.java:50) at [org.apache.hadoop.hive.ql.io|http://org.apache.hadoop.hive.ql.io/].parquet.MapredParquetInputFormat.getRecordReader(MapredParquetInputFormat.java:87) at org.apache.hudi.hadoop.HoodieParquetInputFormat.getRecordReader(HoodieParquetInputFormat.java:203) at [org.apache.hadoop.hive.ql.io|http://org.apache.hadoop.hive.ql.io/].HiveInputFormat.getRecordReader(HiveInputFormat.java:418) ... 26 more Caused by: java.io.IOException: cannot find dir = [s3://yxchang-emr-dev/test-data/hudi/parquet-source-tables/hive_style_partitioned_tb/event_type=two/part-00000-98fb0380-374c-40f5-8a57-89d95270a2c3-c000.parquet] in pathToPartitionInfo: [[s3://bschelle-emr-dev2/hudi-table/test_bootstrap_hive_partitionedrt/event_type=one], [s3://bschelle-emr-dev2/hudi-table/test_bootstrap_hive_partitionedrt/event_type=two]] at [org.apache.hadoop.hive.ql.io|http://org.apache.hadoop.hive.ql.io/].HiveFileFormatUtils.getFromPathRecursively(HiveFileFormatUtils.java:402) at [org.apache.hadoop.hive.ql.io|http://org.apache.hadoop.hive.ql.io/].HiveFileFormatUtils.getFromPathRecursively(HiveFileFormatUtils.java:371) at [org.apache.hadoop.hive.ql.io|http://org.apache.hadoop.hive.ql.io/].HiveFileFormatUtils.getFromPathRecursively(HiveFileFormatUtils.java:366) at org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx.getPartitionValues(VectorizedRowBatchCtx.java:272) at org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx.getPartitionValues(VectorizedRowBatchCtx.java:263) at [org.apache.hadoop.hive.ql.io|http://org.apache.hadoop.hive.ql.io/].parquet.vector.VectorizedParquetRecordReader.initPartitionValues(VectorizedParquetRecordReader.java:164) at [org.apache.hadoop.hive.ql.io|http://org.apache.hadoop.hive.ql.io/].parquet.vector.VectorizedParquetRecordReader.<init>(VectorizedParquetRecordReader.java:153) ... 30 more {code} > Hive "Count" queries don't work with bootstrap tables w/Hive3 > ------------------------------------------------------------- > > Key: HUDI-5526 > URL: https://issues.apache.org/jira/browse/HUDI-5526 > Project: Apache Hudi > Issue Type: Bug > Reporter: Brandon Scheller > Priority: Major > > Hive "count" queries fail on hudi bootstrap tables when they are using Hive3. > This has been tested on all EMR-6.x releases and fails with the same error. > The same query works with Hive2. > For example with the query: > {code:java} > SELECT COUNT(*) FROM HUDI_BOOTSTRAP_TABLE;{code} > Gives the following error: > {code:java} > TaskAttempt 1 failed, info=[Error: Error while running task ( failure ) : > attempt_1672881902089_0008_1_00_000000_1:java.lang.RuntimeException: > java.lang.RuntimeException: java.io.IOException: java.lang.RuntimeException: > java.io.IOException: cannot find dir = > [s3://my-bucket/test-data/hudi/parquet-source-tables/hive_style_partitioned_tb/event_type=two/part-00000-98fb0380-374c-40f5-8a57-89d95270a2c3-c000.parquet] > in pathToPartitionInfo: [ > [s3://my-bucket/hudi-table/test_bootstrap_hive_partitionedrt/event_type=one] > , > [s3://my-bucket/hudi-table/test_bootstrap_hive_partitionedrt/event_type=two] > ] > at > org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:296) > at org.apache.hadoop.hive.ql.exec.tez.TezProcessor.run(TezProcessor.java:250) > at > org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.run(LogicalIOProcessorRuntimeTask.java:374) > at > org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:73) > at > org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:61) > at java.security.AccessController.doPrivileged(Native Method) > at javax.security.auth.Subject.doAs(Subject.java:422) > at > org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1730) > at > org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:61) > at > org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:37) > at org.apache.tez.common.CallableWithNdc.call(CallableWithNdc.java:36) > at java.util.concurrent.FutureTask.run(FutureTask.java:266) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > at java.lang.Thread.run(Thread.java:750) > Caused by: java.lang.RuntimeException: java.io.IOException: > java.lang.RuntimeException: java.io.IOException: cannot find dir = > [s3://my-bucket/test-data/hudi/parquet-source-tables/hive_style_partitioned_tb/event_type=two/part-00000-98fb0380-374c-40f5-8a57-89d95270a2c3-c000.parquet] > in pathToPartitionInfo: > [[s3://my-bucket/hudi-table/test_bootstrap_hive_partitionedrt/event_type=one], > [s3://my-bucket/hudi-table/test_bootstrap_hive_partitionedrt/event_type=two]] > at > org.apache.hadoop.mapred.split.TezGroupedSplitsInputFormat$TezGroupedSplitsRecordReader.initNextRecordReader(TezGroupedSplitsInputFormat.java:206) > at > org.apache.hadoop.mapred.split.TezGroupedSplitsInputFormat$TezGroupedSplitsRecordReader.<init>(TezGroupedSplitsInputFormat.java:145) > at > org.apache.hadoop.mapred.split.TezGroupedSplitsInputFormat.getRecordReader(TezGroupedSplitsInputFormat.java:111) > at > org.apache.tez.mapreduce.lib.MRReaderMapred.setupOldRecordReader(MRReaderMapred.java:157) > at > org.apache.tez.mapreduce.lib.MRReaderMapred.setSplit(MRReaderMapred.java:83) > at > org.apache.tez.mapreduce.input.MRInput.initFromEventInternal(MRInput.java:703) > at org.apache.tez.mapreduce.input.MRInput.initFromEvent(MRInput.java:662) > at > org.apache.tez.mapreduce.input.MRInputLegacy.checkAndAwaitRecordReaderInitialization(MRInputLegacy.java:150) > at org.apache.tez.mapreduce.input.MRInputLegacy.init(MRInputLegacy.java:114) > at > org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.getMRInput(MapRecordProcessor.java:525) > at > org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.init(MapRecordProcessor.java:171) > at > org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:266) > ... 14 more > Caused by: java.io.IOException: java.lang.RuntimeException: > java.io.IOException: cannot find dir = > [s3://my-bucket/test-data/hudi/parquet-source-tables/hive_style_partitioned_tb/event_type=two/part-00000-98fb0380-374c-40f5-8a57-89d95270a2c3-c000.parquet] > in pathToPartitionInfo: > [[s3://my-bucket/hudi-table/test_bootstrap_hive_partitionedrt/event_type=one], > [s3://my-bucket/hudi-table/test_bootstrap_hive_partitionedrt/event_type=two]] > at > [org.apache.hadoop.hive.io|http://org.apache.hadoop.hive.io/].HiveIOExceptionHandlerChain.handleRecordReaderCreationException(HiveIOExceptionHandlerChain.java:97) > at > [org.apache.hadoop.hive.io|http://org.apache.hadoop.hive.io/].HiveIOExceptionHandlerUtil.handleRecordReaderCreationException(HiveIOExceptionHandlerUtil.java:57) > at > [org.apache.hadoop.hive.ql.io|http://org.apache.hadoop.hive.ql.io/].HiveInputFormat.getRecordReader(HiveInputFormat.java:421) > at > org.apache.hadoop.mapred.split.TezGroupedSplitsInputFormat$TezGroupedSplitsRecordReader.initNextRecordReader(TezGroupedSplitsInputFormat.java:203) > ... 25 more > Caused by: java.lang.RuntimeException: java.io.IOException: cannot find dir = > [s3://my-bucket/test-data/hudi/parquet-source-tables/hive_style_partitioned_tb/event_type=two/part-00000-98fb0380-374c-40f5-8a57-89d95270a2c3-c000.parquet] > in pathToPartitionInfo: > [[s3://my-bucket/hudi-table/test_bootstrap_hive_partitionedrt/event_type=one], > [s3://my-bucket/hudi-table/test_bootstrap_hive_partitionedrt/event_type=two]] > at > [org.apache.hadoop.hive.ql.io|http://org.apache.hadoop.hive.ql.io/].parquet.vector.VectorizedParquetRecordReader.<init>(VectorizedParquetRecordReader.java:156) > at > [org.apache.hadoop.hive.ql.io|http://org.apache.hadoop.hive.ql.io/].parquet.VectorizedParquetInputFormat.getRecordReader(VectorizedParquetInputFormat.java:50) > at > [org.apache.hadoop.hive.ql.io|http://org.apache.hadoop.hive.ql.io/].parquet.MapredParquetInputFormat.getRecordReader(MapredParquetInputFormat.java:87) > at > org.apache.hudi.hadoop.HoodieParquetInputFormat.getRecordReader(HoodieParquetInputFormat.java:203) > at > [org.apache.hadoop.hive.ql.io|http://org.apache.hadoop.hive.ql.io/].HiveInputFormat.getRecordReader(HiveInputFormat.java:418) > ... 26 more > Caused by: java.io.IOException: cannot find dir = > [s3://my-bucket/test-data/hudi/parquet-source-tables/hive_style_partitioned_tb/event_type=two/part-00000-98fb0380-374c-40f5-8a57-89d95270a2c3-c000.parquet] > in pathToPartitionInfo: > [[s3://my-bucket/hudi-table/test_bootstrap_hive_partitionedrt/event_type=one], > [s3://my-bucket/hudi-table/test_bootstrap_hive_partitionedrt/event_type=two]] > at > [org.apache.hadoop.hive.ql.io|http://org.apache.hadoop.hive.ql.io/].HiveFileFormatUtils.getFromPathRecursively(HiveFileFormatUtils.java:402) > at > [org.apache.hadoop.hive.ql.io|http://org.apache.hadoop.hive.ql.io/].HiveFileFormatUtils.getFromPathRecursively(HiveFileFormatUtils.java:371) > at > [org.apache.hadoop.hive.ql.io|http://org.apache.hadoop.hive.ql.io/].HiveFileFormatUtils.getFromPathRecursively(HiveFileFormatUtils.java:366) > at > org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx.getPartitionValues(VectorizedRowBatchCtx.java:272) > at > org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx.getPartitionValues(VectorizedRowBatchCtx.java:263) > at > [org.apache.hadoop.hive.ql.io|http://org.apache.hadoop.hive.ql.io/].parquet.vector.VectorizedParquetRecordReader.initPartitionValues(VectorizedParquetRecordReader.java:164) > at > [org.apache.hadoop.hive.ql.io|http://org.apache.hadoop.hive.ql.io/].parquet.vector.VectorizedParquetRecordReader.<init>(VectorizedParquetRecordReader.java:153) > ... 30 more > {code} -- This message was sent by Atlassian Jira (v8.20.10#820010)