jiayuasu opened a new issue, #2650:
URL: https://github.com/apache/sedona/issues/2650
Use the dataframe based shapefile reader to read:
`s3://wherobots-examples/gdea-course-data/raw-data/53033C_20250330/S_FLD_HAZ_AR.shp`
It returns an annoying warning:
26/02/11 19:16:44 WARN FileStreamSink: Assume no metadata directory. Error
while looking for metadata directory in the path:
s3://wherobots-examples/gdea-course-data/raw-data/53033C_20250330/S_FLD_HAZ_AR.???.
java.io.FileNotFoundException: No such file or directory:
s3://wherobots-examples/gdea-course-data/raw-data/53033C_20250330/S_FLD_HAZ_AR.???
at
org.apache.hadoop.fs.s3a.S3AFileSystem.s3GetFileStatus(S3AFileSystem.java:4156)
at
org.apache.hadoop.fs.s3a.S3AFileSystem.innerGetFileStatus(S3AFileSystem.java:4007)
at
org.apache.hadoop.fs.s3a.S3AFileSystem.lambda$getFileStatus$22(S3AFileSystem.java:3984)
at
org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding.invokeTrackingDuration(IOStatisticsBinding.java:547)
at
org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding.lambda$trackDurationOfOperation$5(IOStatisticsBinding.java:528)
at
org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding.trackDuration(IOStatisticsBinding.java:449)
at
org.apache.hadoop.fs.s3a.S3AFileSystem.trackDurationAndSpan(S3AFileSystem.java:2865)
at
org.apache.hadoop.fs.s3a.S3AFileSystem.trackDurationAndSpan(S3AFileSystem.java:2884)
at
org.apache.hadoop.fs.s3a.S3AFileSystem.getFileStatus(S3AFileSystem.java:3982)
at
org.apache.spark.sql.execution.streaming.FileStreamSink$.hasMetadata(FileStreamSink.scala:56)
at
org.apache.spark.sql.execution.datasources.v2.FileTable.fileIndex$lzycompute(FileTable.scala:51)
at
org.apache.spark.sql.execution.datasources.v2.FileTable.fileIndex(FileTable.scala:47)
at
org.apache.spark.sql.execution.datasources.v2.FileTable.$anonfun$dataSchema$4(FileTable.scala:72)
at scala.Option.orElse(Option.scala:477)
at
org.apache.spark.sql.execution.datasources.v2.FileTable.dataSchema$lzycompute(FileTable.scala:72)
at
org.apache.spark.sql.execution.datasources.v2.FileTable.dataSchema(FileTable.scala:66)
at
org.apache.spark.sql.execution.datasources.v2.FileTable.schema$lzycompute(FileTable.scala:84)
at
org.apache.spark.sql.execution.datasources.v2.FileTable.schema(FileTable.scala:82)
at org.apache.spark.sql.connector.catalog.Table.columns(Table.java:65)
at
org.apache.spark.sql.execution.datasources.v2.FileTable.columns(FileTable.scala:38)
at
org.apache.spark.sql.execution.datasources.v2.FileDataSourceV2.inferSchema(FileDataSourceV2.scala:98)
at
org.apache.spark.sql.execution.datasources.v2.FileDataSourceV2.inferSchema$(FileDataSourceV2.scala:95)
at
org.apache.sedona.sql.datasources.shapefile.ShapefileDataSource.inferSchema(ShapefileDataSource.scala:45)
at
org.apache.spark.sql.execution.datasources.v2.DataSourceV2Utils$.getTableFromProvider(DataSourceV2Utils.scala:96)
at
org.apache.spark.sql.execution.datasources.v2.DataSourceV2Utils$.loadV2Source(DataSourceV2Utils.scala:147)
at
org.apache.spark.sql.catalyst.analysis.ResolveDataSource$$anonfun$apply$1.$anonfun$applyOrElse$1(ResolveDataSource.scala:60)
at scala.Option.flatMap(Option.scala:283)
at
org.apache.spark.sql.catalyst.analysis.ResolveDataSource$$anonfun$apply$1.applyOrElse(ResolveDataSource.scala:58)
at
org.apache.spark.sql.catalyst.analysis.ResolveDataSource$$anonfun$apply$1.applyOrElse(ResolveDataSource.scala:45)
at
org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.$anonfun$resolveOperatorsUpWithPruning$3(AnalysisHelper.scala:139)
at
org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(origin.scala:86)
at
org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.$anonfun$resolveOperatorsUpWithPruning$1(AnalysisHelper.scala:139)
at
org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$.allowInvokingTransformsInAnalyzer(AnalysisHelper.scala:416)
at
org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.resolveOperatorsUpWithPruning(AnalysisHelper.scala:135)
at
org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.resolveOperatorsUpWithPruning$(AnalysisHelper.scala:131)
at
org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.resolveOperatorsUpWithPruning(LogicalPlan.scala:37)
at
org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.resolveOperatorsUp(AnalysisHelper.scala:112)
at
org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.resolveOperatorsUp$(AnalysisHelper.scala:111)
at
org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.resolveOperatorsUp(LogicalPlan.scala:37)
at
org.apache.spark.sql.catalyst.analysis.ResolveDataSource.apply(ResolveDataSource.scala:45)
at
org.apache.spark.sql.catalyst.analysis.ResolveDataSource.apply(ResolveDataSource.scala:43)
at
org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$2(RuleExecutor.scala:242)
at scala.collection.LinearSeqOps.foldLeft(LinearSeq.scala:183)
at scala.collection.LinearSeqOps.foldLeft$(LinearSeq.scala:179)
at scala.collection.immutable.List.foldLeft(List.scala:79)
at
org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$1(RuleExecutor.scala:239)
at
org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$1$adapted(RuleExecutor.scala:231)
at scala.collection.immutable.List.foreach(List.scala:334)
at
org.apache.spark.sql.catalyst.rules.RuleExecutor.execute(RuleExecutor.scala:231)
at
org.apache.spark.sql.catalyst.analysis.Analyzer.org$apache$spark$sql$catalyst$analysis$Analyzer$$executeSameContext(Analyzer.scala:340)
at
org.apache.spark.sql.catalyst.analysis.Analyzer.$anonfun$execute$1(Analyzer.scala:336)
at
org.apache.spark.sql.catalyst.analysis.AnalysisContext$.withNewAnalysisContext(Analyzer.scala:234)
at
org.apache.spark.sql.catalyst.analysis.Analyzer.execute(Analyzer.scala:336)
at
org.apache.spark.sql.catalyst.analysis.Analyzer.execute(Analyzer.scala:299)
at
org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$executeAndTrack$1(RuleExecutor.scala:201)
at
org.apache.spark.sql.catalyst.QueryPlanningTracker$.withTracker(QueryPlanningTracker.scala:89)
at
org.apache.spark.sql.catalyst.rules.RuleExecutor.executeAndTrack(RuleExecutor.scala:201)
at
org.apache.spark.sql.catalyst.analysis.resolver.HybridAnalyzer.resolveInFixedPoint(HybridAnalyzer.scala:190)
at
org.apache.spark.sql.catalyst.analysis.resolver.HybridAnalyzer.$anonfun$apply$1(HybridAnalyzer.scala:76)
at
org.apache.spark.sql.catalyst.analysis.resolver.HybridAnalyzer.withTrackedAnalyzerBridgeState(HybridAnalyzer.scala:111)
at
org.apache.spark.sql.catalyst.analysis.resolver.HybridAnalyzer.apply(HybridAnalyzer.scala:71)
at
org.apache.spark.sql.catalyst.analysis.Analyzer.$anonfun$executeAndCheck$1(Analyzer.scala:330)
at
org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$.markInAnalyzer(AnalysisHelper.scala:423)
at
org.apache.spark.sql.catalyst.analysis.Analyzer.executeAndCheck(Analyzer.scala:330)
at
org.apache.spark.sql.execution.QueryExecution.$anonfun$lazyAnalyzed$2(QueryExecution.scala:110)
at
org.apache.spark.sql.catalyst.QueryPlanningTracker.measurePhase(QueryPlanningTracker.scala:148)
at
org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$2(QueryExecution.scala:278)
at
org.apache.spark.sql.execution.QueryExecution$.withInternalError(QueryExecution.scala:654)
at
org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$1(QueryExecution.scala:278)
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:804)
at
org.apache.spark.sql.execution.QueryExecution.executePhase(QueryExecution.scala:277)
at
org.apache.spark.sql.execution.QueryExecution.$anonfun$lazyAnalyzed$1(QueryExecution.scala:110)
at scala.util.Try$.apply(Try.scala:217)
at
org.apache.spark.util.Utils$.doTryWithCallerStacktrace(Utils.scala:1378)
at org.apache.spark.util.LazyTry.tryT$lzycompute(LazyTry.scala:46)
at org.apache.spark.util.LazyTry.tryT(LazyTry.scala:46)
at org.apache.spark.util.LazyTry.get(LazyTry.scala:58)
at
org.apache.spark.sql.execution.QueryExecution.analyzed(QueryExecution.scala:121)
at
org.apache.spark.sql.execution.QueryExecution.assertAnalyzed(QueryExecution.scala:80)
at
org.apache.spark.sql.classic.Dataset$.$anonfun$ofRows$1(Dataset.scala:115)
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:804)
at org.apache.spark.sql.classic.Dataset$.ofRows(Dataset.scala:113)
at
org.apache.spark.sql.classic.DataFrameReader.load(DataFrameReader.scala:109)
at
org.apache.spark.sql.classic.DataFrameReader.load(DataFrameReader.scala:100)
at
org.apache.spark.sql.classic.DataFrameReader.load(DataFrameReader.scala:58)
at
java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at
java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:77)
at
java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.base/java.lang.reflect.Method.invoke(Method.java:569)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:374)
at py4j.Gateway.invoke(Gateway.java:282)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at
py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:184)
at py4j.ClientServerConnection.run(ClientServerConnection.java:108)
at java.base/java.lang.Thread.run(Thread.java:840)
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]