[ 
https://issues.apache.org/jira/browse/HUDI-4526?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Zhaojing Yu updated HUDI-4526:
------------------------------
    Fix Version/s: 0.12.2
                       (was: 0.12.1)

> improve spillableMapBasePath disk directory is full
> ---------------------------------------------------
>
>                 Key: HUDI-4526
>                 URL: https://issues.apache.org/jira/browse/HUDI-4526
>             Project: Apache Hudi
>          Issue Type: Improvement
>          Components: spark
>            Reporter: Forward Xu
>            Assignee: sivabalan narayanan
>            Priority: Critical
>              Labels: pull-request-available
>             Fix For: 0.12.2
>
>
> {code:java}
> // code placeholder
> SLF4J: Class path contains multiple SLF4J bindings.
> SLF4J: Found binding in 
> [jar:file:/data13/yarnenv/local/filecache/72005/spark-jars.zip/slf4j-log4j12-1.7.16.jar!/org/slf4j/impl/StaticLoggerBinder.class]
> SLF4J: Found binding in 
> [jar:file:/data/gaiaadmin/gaiaenv/tdwgaia/share/hadoop/common/lib/slf4j-log4j12-1.7.10.jar!/org/slf4j/impl/StaticLoggerBinder.class]
> SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an 
> explanation.
> SLF4J: Actual binding is of type [org.slf4j.impl.Log4jLoggerFactory]
> 22/08/02 19:14:55 ERROR AbstractHoodieLogRecordReader: Got exception when 
> reading log file
> org.apache.hudi.exception.HoodieIOException: Unable to create 
> :/tmp/hudi-BITCASK-092a9065-a2b6-4a72-aff4-23a7072e8064
>       at 
> org.apache.hudi.common.util.collection.ExternalSpillableMap.getDiskBasedMap(ExternalSpillableMap.java:122)
>       at 
> org.apache.hudi.common.util.collection.ExternalSpillableMap.get(ExternalSpillableMap.java:197)
>       at 
> org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner.processNextDeletedRecord(HoodieMergedLogRecordScanner.java:168)
>       at 
> java.util.Spliterators$ArraySpliterator.forEachRemaining(Spliterators.java:948)
>       at 
> java.util.stream.ReferencePipeline$Head.forEach(ReferencePipeline.java:647)
>       at 
> org.apache.hudi.common.table.log.AbstractHoodieLogRecordReader.processQueuedBlocksForInstant(AbstractHoodieLogRecordReader.java:473)
>       at 
> org.apache.hudi.common.table.log.AbstractHoodieLogRecordReader.scanInternal(AbstractHoodieLogRecordReader.java:343)
>       at 
> org.apache.hudi.common.table.log.AbstractHoodieLogRecordReader.scan(AbstractHoodieLogRecordReader.java:192)
>       at 
> org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner.performScan(HoodieMergedLogRecordScanner.java:110)
>       at 
> org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner.<init>(HoodieMergedLogRecordScanner.java:103)
>       at 
> org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner$Builder.build(HoodieMergedLogRecordScanner.java:324)
>       at 
> org.apache.hudi.HoodieMergeOnReadRDD$.scanLog(HoodieMergeOnReadRDD.scala:370)
>       at 
> org.apache.hudi.HoodieMergeOnReadRDD$LogFileIterator.<init>(HoodieMergeOnReadRDD.scala:171)
>       at 
> org.apache.hudi.HoodieMergeOnReadRDD.compute(HoodieMergeOnReadRDD.scala:92)
>       at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
>       at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
>       at 
> org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
>       at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
>       at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
>       at 
> org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
>       at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
>       at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
>       at 
> org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
>       at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
>       at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
>       at 
> org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:99)
>       at 
> org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:55)
>       at org.apache.spark.scheduler.Task.run(Task.scala:123)
>       at 
> org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408)
>       at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1419)
>       at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414)
>       at 
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
>       at 
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
>       at java.lang.Thread.run(Thread.java:748)
> Caused by: java.io.IOException: Unable to create 
> :/tmp/hudi-BITCASK-092a9065-a2b6-4a72-aff4-23a7072e8064
>       at org.apache.hudi.common.util.FileIOUtils.mkdir(FileIOUtils.java:70)
>       at 
> org.apache.hudi.common.util.collection.DiskMap.<init>(DiskMap.java:55)
>       at 
> org.apache.hudi.common.util.collection.BitCaskDiskMap.<init>(BitCaskDiskMap.java:98)
>       at 
> org.apache.hudi.common.util.collection.ExternalSpillableMap.getDiskBasedMap(ExternalSpillableMap.java:119)
>       ... 33 more
> 22/08/02 19:14:55 ERROR Executor: Exception in task 104.0 in stage 0.0 (TID 
> 104)
> org.apache.hudi.exception.HoodieException: Exception when reading log file 
>       at 
> org.apache.hudi.common.table.log.AbstractHoodieLogRecordReader.scanInternal(AbstractHoodieLogRecordReader.java:352)
>       at 
> org.apache.hudi.common.table.log.AbstractHoodieLogRecordReader.scan(AbstractHoodieLogRecordReader.java:192)
>       at 
> org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner.performScan(HoodieMergedLogRecordScanner.java:110)
>       at 
> org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner.<init>(HoodieMergedLogRecordScanner.java:103)
>       at 
> org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner$Builder.build(HoodieMergedLogRecordScanner.java:324)
>       at 
> org.apache.hudi.HoodieMergeOnReadRDD$.scanLog(HoodieMergeOnReadRDD.scala:370)
>       at 
> org.apache.hudi.HoodieMergeOnReadRDD$LogFileIterator.<init>(HoodieMergeOnReadRDD.scala:171)
>       at 
> org.apache.hudi.HoodieMergeOnReadRDD.compute(HoodieMergeOnReadRDD.scala:92)
>       at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
>       at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
>       at 
> org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
>       at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
>       at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
>       at 
> org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
>       at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
>       at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
>       at 
> org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
>       at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
>       at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
>       at 
> org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:99)
>       at 
> org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:55)
>       at org.apache.spark.scheduler.Task.run(Task.scala:123)
>       at 
> org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408)
>       at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1419)
>       at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414)
>       at 
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
>       at 
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
>       at java.lang.Thread.run(Thread.java:748)
> Caused by: org.apache.hudi.exception.HoodieIOException: Unable to create 
> :/tmp/hudi-BITCASK-092a9065-a2b6-4a72-aff4-23a7072e8064 {code}



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

Reply via email to