[ https://issues.apache.org/jira/browse/CARBONDATA-4022?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Kunal Kapoor resolved CARBONDATA-4022. -------------------------------------- Fix Version/s: 2.1.1 Resolution: Fixed > Getting the error - "PathName is not a valid DFS filename." with index server > and after adding carbon SDK segments and then doing select/update/delete > operations. > ------------------------------------------------------------------------------------------------------------------------------------------------------------------ > > Key: CARBONDATA-4022 > URL: https://issues.apache.org/jira/browse/CARBONDATA-4022 > Project: CarbonData > Issue Type: Bug > Affects Versions: 2.0.0 > Reporter: Prasanna Ravichandran > Priority: Major > Fix For: 2.1.1 > > Time Spent: 3h 10m > Remaining Estimate: 0h > > Getting this error - "PathName is not a valid DFS filename." during the > update/delete/select queries on a added SDK segment table. Also the path > represented in the error is not proper, which is the cause of error. This is > seen only when index server is running and disable fallback is true. > Queries and errors: > > create table sdk_2level_1(name string, rec1 > > struct<intVal:int,StringVal:array<string>>) stored as carbondata; > +---------+ > | Result | > +---------+ > +---------+ > No rows selected (0.425 seconds) > > alter table sdk_2level_1 add segment > > options('path'='hdfs://hacluster/sdkfiles/twolevelnestedrecwitharray','format'='carbondata'); > +---------+ > | Result | > +---------+ > +---------+ > No rows selected (0.77 seconds) > > select * from sdk_2level_1; > INFO : Execution ID: 1855 > Error: org.apache.spark.SparkException: Job aborted due to stage failure: > Task 0 in stage 600.0 failed 4 times, most recent failure: Lost task 0.3 in > stage 600.0 (TID 21345, linux, executor 16): > java.lang.IllegalArgumentException: Pathname > /user/hive/warehouse/carbon.store/rps/sdk_2level_1hdfs:/hacluster/sdkfiles/twolevelnestedrecwitharray/part-0-188852617294480_batchno0-0-null-188852332673632.carbondata > from > hdfs://hacluster/user/hive/warehouse/carbon.store/rps/sdk_2level_1hdfs:/hacluster/sdkfiles/twolevelnestedrecwitharray/part-0-188852617294480_batchno0-0-null-188852332673632.carbondata > is not a valid DFS filename. > at > org.apache.hadoop.hdfs.DistributedFileSystem.getPathName(DistributedFileSystem.java:249) > at > org.apache.hadoop.hdfs.DistributedFileSystem$4.doCall(DistributedFileSystem.java:332) > at > org.apache.hadoop.hdfs.DistributedFileSystem$4.doCall(DistributedFileSystem.java:328) > at > org.apache.hadoop.fs.FileSystemLinkResolver.resolve(FileSystemLinkResolver.java:81) > at > org.apache.hadoop.hdfs.DistributedFileSystem.open(DistributedFileSystem.java:340) > at org.apache.hadoop.fs.FileSystem.open(FileSystem.java:955) > at > org.apache.carbondata.core.datastore.filesystem.AbstractDFSCarbonFile.getDataInputStream(AbstractDFSCarbonFile.java:316) > at > org.apache.carbondata.core.datastore.filesystem.AbstractDFSCarbonFile.getDataInputStream(AbstractDFSCarbonFile.java:293) > at > org.apache.carbondata.core.datastore.impl.FileFactory.getDataInputStream(FileFactory.java:198) > at > org.apache.carbondata.core.datastore.impl.FileFactory.getDataInputStream(FileFactory.java:188) > at org.apache.carbondata.core.reader.ThriftReader.open(ThriftReader.java:100) > at > org.apache.carbondata.core.reader.CarbonHeaderReader.readHeader(CarbonHeaderReader.java:60) > at > org.apache.carbondata.core.util.DataFileFooterConverterV3.readDataFileFooter(DataFileFooterConverterV3.java:65) > at > org.apache.carbondata.core.util.CarbonUtil.getDataFileFooter(CarbonUtil.java:902) > at > org.apache.carbondata.core.util.CarbonUtil.readMetadataFile(CarbonUtil.java:874) > at > org.apache.carbondata.core.scan.executor.impl.AbstractQueryExecutor.getDataBlocks(AbstractQueryExecutor.java:216) > at > org.apache.carbondata.core.scan.executor.impl.AbstractQueryExecutor.initQuery(AbstractQueryExecutor.java:138) > at > org.apache.carbondata.core.scan.executor.impl.AbstractQueryExecutor.getBlockExecutionInfos(AbstractQueryExecutor.java:382) > at > org.apache.carbondata.core.scan.executor.impl.DetailQueryExecutor.execute(DetailQueryExecutor.java:47) > at > org.apache.carbondata.hadoop.CarbonRecordReader.initialize(CarbonRecordReader.java:117) > at > org.apache.carbondata.spark.rdd.CarbonScanRDD$$anon$1.hasNext(CarbonScanRDD.scala:540) > at > org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown > Source) > at > org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43) > at > org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$13$$anon$1.hasNext(WholeStageCodegenExec.scala:584) > at > org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:301) > at > org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:293) > at > org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:857) > at > org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:857) > at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:310) > at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) > at org.apache.spark.scheduler.Task.run(Task.scala:123) > at > org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:410) > at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1551) > at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:416) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > at java.lang.Thread.run(Thread.java:748) > > update sdk_2level_1 set (name)=("RAMU") where name="LILY"; > INFO : Execution ID: 1871 > Error: java.lang.RuntimeException: Update operation failed. Job aborted due > to stage failure: Task 0 in stage 605.0 failed 4 times, most recent failure: > Lost task 0.3 in stage 605.0 (TID 21357, linux, executor 27): > java.lang.IllegalArgumentException: Pathname > /user/hive/warehouse/carbon.store/rps/sdk_2level_1hdfs:/hacluster/sdkfiles/twolevelnestedrecwitharray/part-0-188852617294480_batchno0-0-null-188852332673632.carbondata > from > hdfs://hacluster/user/hive/warehouse/carbon.store/rps/sdk_2level_1hdfs:/hacluster/sdkfiles/twolevelnestedrecwitharray/part-0-188852617294480_batchno0-0-null-188852332673632.carbondata > is not a valid DFS filename. > at > org.apache.hadoop.hdfs.DistributedFileSystem.getPathName(DistributedFileSystem.java:249) > at > org.apache.hadoop.hdfs.DistributedFileSystem$4.doCall(DistributedFileSystem.java:332) > at > org.apache.hadoop.hdfs.DistributedFileSystem$4.doCall(DistributedFileSystem.java:328) > at > org.apache.hadoop.fs.FileSystemLinkResolver.resolve(FileSystemLinkResolver.java:81) > at > org.apache.hadoop.hdfs.DistributedFileSystem.open(DistributedFileSystem.java:340) > at org.apache.hadoop.fs.FileSystem.open(FileSystem.java:955) > at > org.apache.carbondata.core.datastore.filesystem.AbstractDFSCarbonFile.getDataInputStream(AbstractDFSCarbonFile.java:316) > at > org.apache.carbondata.core.datastore.filesystem.AbstractDFSCarbonFile.getDataInputStream(AbstractDFSCarbonFile.java:293) > at > org.apache.carbondata.core.datastore.impl.FileFactory.getDataInputStream(FileFactory.java:198) > at > org.apache.carbondata.core.datastore.impl.FileFactory.getDataInputStream(FileFactory.java:188) > at org.apache.carbondata.core.reader.ThriftReader.open(ThriftReader.java:100) > at > org.apache.carbondata.core.reader.CarbonHeaderReader.readHeader(CarbonHeaderReader.java:60) > at > org.apache.carbondata.core.util.DataFileFooterConverterV3.readDataFileFooter(DataFileFooterConverterV3.java:65) > at > org.apache.carbondata.core.util.CarbonUtil.getDataFileFooter(CarbonUtil.java:902) > at > org.apache.carbondata.core.util.CarbonUtil.readMetadataFile(CarbonUtil.java:874) > at > org.apache.carbondata.core.scan.executor.impl.AbstractQueryExecutor.getDataBlocks(AbstractQueryExecutor.java:216) > at > org.apache.carbondata.core.scan.executor.impl.AbstractQueryExecutor.initQuery(AbstractQueryExecutor.java:138) > at > org.apache.carbondata.core.scan.executor.impl.AbstractQueryExecutor.getBlockExecutionInfos(AbstractQueryExecutor.java:382) > at > org.apache.carbondata.core.scan.executor.impl.DetailQueryExecutor.execute(DetailQueryExecutor.java:47) > at > org.apache.carbondata.hadoop.CarbonRecordReader.initialize(CarbonRecordReader.java:117) > at > org.apache.carbondata.spark.rdd.CarbonScanRDD$$anon$1.hasNext(CarbonScanRDD.scala:540) > at > org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown > Source) > at > org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43) > at > org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$13$$anon$1.hasNext(WholeStageCodegenExec.scala:584) > at > org.apache.spark.sql.execution.columnar.CachedRDDBuilder$$anonfun$1$$anon$1.hasNext(InMemoryRelation.scala:125) > at > org.apache.spark.storage.memory.MemoryStore.putIterator(MemoryStore.scala:221) > at > org.apache.spark.storage.memory.MemoryStore.putIteratorAsValues(MemoryStore.scala:299) > at > org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1325) > at > org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1316) > at org.apache.spark.storage.BlockManager.doPut(BlockManager.scala:1251) > at > org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1316) > at > org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:1042) > at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:357) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:308) > at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:310) > at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:310) > at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:310) > at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:310) > at > org.apache.spark.shuffle.ShuffleWriteProcessor.write(ShuffleWriteProcessor.scala:59) > at > org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:102) > at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:55) > at org.apache.spark.scheduler.Task.run(Task.scala:123) > at > org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:410) > at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1551) > at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:416) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > at java.lang.Thread.run(Thread.java:748) > > create table external_primitive (id int, name string, rank smallint, salary > > double, active boolean, dob date, doj timestamp, city string, dept string) > > stored as carbondata; > +---------+ > | Result | > +---------+ > +---------+ > No rows selected (0.448 seconds) > > insert into external_primitive select > > 1,"Pr",1,10,true,"1992-12-09","1992-10-07 22:00:20.0","chennai","CSE"; > INFO : Execution ID: 1996 > +---------+ > | Result | > +---------+ > +---------+ > No rows selected (6.365 seconds) > > > > select * from external_primitive; > INFO : Execution ID: 2002 > +-----+-------+-------+---------+---------+-------------+------------------------+----------+-------+ > | id | name | rank | salary | active | dob | doj | city | dept | > +-----+-------+-------+---------+---------+-------------+------------------------+----------+-------+ > | 1 | Pr | 1 | 10.0 | true | 1992-12-09 | 1992-10-07 22:00:20.0 | chennai | > CSE | > +-----+-------+-------+---------+---------+-------------+------------------------+----------+-------+ > 1 row selected (0.856 seconds) > > > > alter table external_primitive add segment > > options('path'='hdfs://hacluster/sdkfiles/primitive','format'='carbon'); > +---------+ > | Result | > +---------+ > +---------+ > No rows selected (0.529 seconds) > > select count(*) from external_primitive;--14; > INFO : Execution ID: 2008 > +-----------+ > | count(1) | > +-----------+ > | 14 | > +-----------+ > 1 row selected (0.845 seconds) > > > > delete from external_primitive where id =2; > INFO : Execution ID: 2010 > Error: java.lang.RuntimeException: Delete data operation is failed. Job > aborted due to stage failure: Task 0 in stage 685.0 failed 4 times, most > recent failure: Lost task 0.3 in stage 685.0 (TID 21446, linux-13, executor > 7): java.lang.IllegalArgumentException: Pathname > /user/hive/warehouse/carbon.store/rps/external_primitivehdfs:/hacluster/sdkfiles/primitive/part-0-188354604253792_batchno0-0-null-188354313936272.carbondata > from > hdfs://hacluster/user/hive/warehouse/carbon.store/rps/external_primitivehdfs:/hacluster/sdkfiles/primitive/part-0-188354604253792_batchno0-0-null-188354313936272.carbondata > is not a valid DFS filename. > at > org.apache.hadoop.hdfs.DistributedFileSystem.getPathName(DistributedFileSystem.java:249) > at > org.apache.hadoop.hdfs.DistributedFileSystem$4.doCall(DistributedFileSystem.java:332) > at > org.apache.hadoop.hdfs.DistributedFileSystem$4.doCall(DistributedFileSystem.java:328) > at > org.apache.hadoop.fs.FileSystemLinkResolver.resolve(FileSystemLinkResolver.java:81) > at > org.apache.hadoop.hdfs.DistributedFileSystem.open(DistributedFileSystem.java:340) > at org.apache.hadoop.fs.FileSystem.open(FileSystem.java:955) > at > org.apache.carbondata.core.datastore.filesystem.AbstractDFSCarbonFile.getDataInputStream(AbstractDFSCarbonFile.java:316) > at > org.apache.carbondata.core.datastore.filesystem.AbstractDFSCarbonFile.getDataInputStream(AbstractDFSCarbonFile.java:293) > at > org.apache.carbondata.core.datastore.impl.FileFactory.getDataInputStream(FileFactory.java:198) > at > org.apache.carbondata.core.datastore.impl.FileFactory.getDataInputStream(FileFactory.java:188) > at org.apache.carbondata.core.reader.ThriftReader.open(ThriftReader.java:100) > at > org.apache.carbondata.core.reader.CarbonHeaderReader.readHeader(CarbonHeaderReader.java:60) > at > org.apache.carbondata.core.util.DataFileFooterConverterV3.readDataFileFooter(DataFileFooterConverterV3.java:65) > at > org.apache.carbondata.core.util.CarbonUtil.getDataFileFooter(CarbonUtil.java:902) > at > org.apache.carbondata.core.util.CarbonUtil.readMetadataFile(CarbonUtil.java:874) > at > org.apache.carbondata.core.scan.executor.impl.AbstractQueryExecutor.getDataBlocks(AbstractQueryExecutor.java:216) > at > org.apache.carbondata.core.scan.executor.impl.AbstractQueryExecutor.initQuery(AbstractQueryExecutor.java:138) > at > org.apache.carbondata.core.scan.executor.impl.AbstractQueryExecutor.getBlockExecutionInfos(AbstractQueryExecutor.java:382) > at > org.apache.carbondata.core.scan.executor.impl.VectorDetailQueryExecutor.execute(VectorDetailQueryExecutor.java:43) > at > org.apache.carbondata.spark.vectorreader.VectorizedCarbonRecordReader.initialize(VectorizedCarbonRecordReader.java:141) > at > org.apache.carbondata.spark.rdd.CarbonScanRDD$$anon$1.hasNext(CarbonScanRDD.scala:540) > at > org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.scan_nextBatch_0$(Unknown > Source) > at > org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown > Source) > at > org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43) > at > org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$13$$anon$1.hasNext(WholeStageCodegenExec.scala:584) > at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409) > at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409) > at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409) > at > org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.write(BypassMergeSortShuffleWriter.java:132) > at > org.apache.spark.shuffle.ShuffleWriteProcessor.write(ShuffleWriteProcessor.scala:58) > at > org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:102) > at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:55) > at org.apache.spark.scheduler.Task.run(Task.scala:123) > at > org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:410) > at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1551) > at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:416) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > at java.lang.Thread.run(Thread.java:748) > Driver stacktrace: (state=,code=0) -- This message was sent by Atlassian Jira (v8.3.4#803005)