[ https://issues.apache.org/jira/browse/HUDI-4526?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Zhaojing Yu updated HUDI-4526: ------------------------------ Fix Version/s: 0.12.2 (was: 0.12.1) > improve spillableMapBasePath disk directory is full > --------------------------------------------------- > > Key: HUDI-4526 > URL: https://issues.apache.org/jira/browse/HUDI-4526 > Project: Apache Hudi > Issue Type: Improvement > Components: spark > Reporter: Forward Xu > Assignee: sivabalan narayanan > Priority: Critical > Labels: pull-request-available > Fix For: 0.12.2 > > > {code:java} > // code placeholder > SLF4J: Class path contains multiple SLF4J bindings. > SLF4J: Found binding in > [jar:file:/data13/yarnenv/local/filecache/72005/spark-jars.zip/slf4j-log4j12-1.7.16.jar!/org/slf4j/impl/StaticLoggerBinder.class] > SLF4J: Found binding in > [jar:file:/data/gaiaadmin/gaiaenv/tdwgaia/share/hadoop/common/lib/slf4j-log4j12-1.7.10.jar!/org/slf4j/impl/StaticLoggerBinder.class] > SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an > explanation. > SLF4J: Actual binding is of type [org.slf4j.impl.Log4jLoggerFactory] > 22/08/02 19:14:55 ERROR AbstractHoodieLogRecordReader: Got exception when > reading log file > org.apache.hudi.exception.HoodieIOException: Unable to create > :/tmp/hudi-BITCASK-092a9065-a2b6-4a72-aff4-23a7072e8064 > at > org.apache.hudi.common.util.collection.ExternalSpillableMap.getDiskBasedMap(ExternalSpillableMap.java:122) > at > org.apache.hudi.common.util.collection.ExternalSpillableMap.get(ExternalSpillableMap.java:197) > at > org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner.processNextDeletedRecord(HoodieMergedLogRecordScanner.java:168) > at > java.util.Spliterators$ArraySpliterator.forEachRemaining(Spliterators.java:948) > at > java.util.stream.ReferencePipeline$Head.forEach(ReferencePipeline.java:647) > at > org.apache.hudi.common.table.log.AbstractHoodieLogRecordReader.processQueuedBlocksForInstant(AbstractHoodieLogRecordReader.java:473) > at > org.apache.hudi.common.table.log.AbstractHoodieLogRecordReader.scanInternal(AbstractHoodieLogRecordReader.java:343) > at > org.apache.hudi.common.table.log.AbstractHoodieLogRecordReader.scan(AbstractHoodieLogRecordReader.java:192) > at > org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner.performScan(HoodieMergedLogRecordScanner.java:110) > at > org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner.<init>(HoodieMergedLogRecordScanner.java:103) > at > org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner$Builder.build(HoodieMergedLogRecordScanner.java:324) > at > org.apache.hudi.HoodieMergeOnReadRDD$.scanLog(HoodieMergeOnReadRDD.scala:370) > at > org.apache.hudi.HoodieMergeOnReadRDD$LogFileIterator.<init>(HoodieMergeOnReadRDD.scala:171) > at > org.apache.hudi.HoodieMergeOnReadRDD.compute(HoodieMergeOnReadRDD.scala:92) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:310) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:310) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:310) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:310) > at > org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:99) > at > org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:55) > at org.apache.spark.scheduler.Task.run(Task.scala:123) > at > org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408) > at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1419) > at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > at java.lang.Thread.run(Thread.java:748) > Caused by: java.io.IOException: Unable to create > :/tmp/hudi-BITCASK-092a9065-a2b6-4a72-aff4-23a7072e8064 > at org.apache.hudi.common.util.FileIOUtils.mkdir(FileIOUtils.java:70) > at > org.apache.hudi.common.util.collection.DiskMap.<init>(DiskMap.java:55) > at > org.apache.hudi.common.util.collection.BitCaskDiskMap.<init>(BitCaskDiskMap.java:98) > at > org.apache.hudi.common.util.collection.ExternalSpillableMap.getDiskBasedMap(ExternalSpillableMap.java:119) > ... 33 more > 22/08/02 19:14:55 ERROR Executor: Exception in task 104.0 in stage 0.0 (TID > 104) > org.apache.hudi.exception.HoodieException: Exception when reading log file > at > org.apache.hudi.common.table.log.AbstractHoodieLogRecordReader.scanInternal(AbstractHoodieLogRecordReader.java:352) > at > org.apache.hudi.common.table.log.AbstractHoodieLogRecordReader.scan(AbstractHoodieLogRecordReader.java:192) > at > org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner.performScan(HoodieMergedLogRecordScanner.java:110) > at > org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner.<init>(HoodieMergedLogRecordScanner.java:103) > at > org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner$Builder.build(HoodieMergedLogRecordScanner.java:324) > at > org.apache.hudi.HoodieMergeOnReadRDD$.scanLog(HoodieMergeOnReadRDD.scala:370) > at > org.apache.hudi.HoodieMergeOnReadRDD$LogFileIterator.<init>(HoodieMergeOnReadRDD.scala:171) > at > org.apache.hudi.HoodieMergeOnReadRDD.compute(HoodieMergeOnReadRDD.scala:92) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:310) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:310) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:310) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:310) > at > org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:99) > at > org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:55) > at org.apache.spark.scheduler.Task.run(Task.scala:123) > at > org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408) > at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1419) > at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > at java.lang.Thread.run(Thread.java:748) > Caused by: org.apache.hudi.exception.HoodieIOException: Unable to create > :/tmp/hudi-BITCASK-092a9065-a2b6-4a72-aff4-23a7072e8064 {code} -- This message was sent by Atlassian Jira (v8.20.10#820010)